From 8b14b7175f8ce1e0d21761af4c6853b20e44c233 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 19 Jun 2026 17:54:26 +0200 Subject: [PATCH 1/4] [SPARK-57526][SQL] Add the `timestamp_nanos` function to create nanosecond-precision timestamps from numeric nanoseconds ### What changes were proposed in this pull request? This PR adds a new built-in function `timestamp_nanos(expr)` that interprets `expr` as the number of nanoseconds since `1970-01-01 00:00:00 UTC` and returns a nanosecond-precision `TIMESTAMP_LTZ(9)`. Concretely: - Adds a `NanosToTimestamp` expression in `datetimeExpressions.scala`. It declares a single `DECIMAL` input type with `ImplicitCastInputTypes`, so integral arguments are coerced to their natural decimal automatically while `DECIMAL` arguments are accepted as-is. - Maps the nanosecond count `N` to the internal `(epochMicros, nanosWithinMicro)` pair with floor semantics (`epochMicros = floorDiv(N, 1000)`, `nanosWithinMicro = floorMod(N, 1000)`, always in `[0, 999]`), computed via `BigInteger` in both the interpreted (`eval`) and codegen (`doGenCode`) paths. `longValueExact` throws `ArithmeticException` when the value is outside the representable timestamp range. - A `DECIMAL` input (rather than `BIGINT`) is required to reach the full `[0001, 9999]` calendar range: nanoseconds for year 9999 (~2.5e20) overflow a 64-bit `BIGINT`, the same reason the inverse `unix_nanos` returns `DECIMAL(21, 0)`. - Registers `timestamp_nanos` in `FunctionRegistry` and adds the Scala `functions.timestamp_nanos`. - Adds catalyst unit tests (interpreted + codegen, full-range and round-trip with `unix_nanos`, overflow), Scala/SQL end-to-end tests, and SQL golden-file coverage. Scope notes: the PySpark API (classic and Spark Connect Python) and R are out of scope here and tracked as follow-ups; `timestamp_nanos` is recorded in the PySpark function-parity allowlist in the meantime. The Scala Spark Connect client picks up `timestamp_nanos` automatically because `functions.scala` lives in the shared `sql/api` module. ### Why are the changes needed? Part of the [SPARK-56822](https://issues.apache.org/jira/browse/SPARK-56822) umbrella (timestamps with nanosecond precision). Spark has `timestamp_seconds` / `timestamp_millis` / `timestamp_micros` but no nanosecond counterpart, which is the natural inverse of `unix_nanos`. ### Does this PR introduce _any_ user-facing change? Yes. A new `timestamp_nanos(expr)` function is available in SQL and the Scala API (including the Scala Spark Connect client). It returns `TIMESTAMP_LTZ(9)`. This is a change only within the unreleased nanosecond-timestamp preview. Example: ```sql SELECT timestamp_nanos(1230219000123456789); -- 2008-12-25 07:30:00.123456789 ``` ### How was this patch tested? - `build/sbt 'catalyst/testOnly org.apache.spark.sql.catalyst.expressions.DateExpressionsSuite'` - `build/sbt 'sql/testOnly org.apache.spark.sql.TimestampNanosFunctionsAnsiOnSuite org.apache.spark.sql.TimestampNanosFunctionsAnsiOffSuite'` - `build/sbt 'sql/testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite org.apache.spark.sql.ExpressionsSchemaSuite'` - `SPARK_GENERATE_GOLDEN_FILES=1 build/sbt 'sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z "nanos"'` - `./dev/scalastyle` ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor --- python/pyspark/sql/tests/test_functions.py | 1 + .../org/apache/spark/sql/functions.scala | 9 +++ .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/datetimeExpressions.scala | 60 +++++++++++++++++++ .../expressions/DateExpressionsSuite.scala | 40 +++++++++++++ .../sql-functions/sql-expression-schema.md | 1 + .../timestamp-ltz-nanos.sql.out | 28 +++++++++ .../sql-tests/inputs/timestamp-ltz-nanos.sql | 10 ++++ .../results/timestamp-ltz-nanos.sql.out | 32 ++++++++++ .../TimestampNanosFunctionsSuiteBase.scala | 35 +++++++++++ 10 files changed, 217 insertions(+) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 10aa01e5a600..c9ca0fca96a7 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -84,6 +84,7 @@ def test_function_parity(self): # Functions that we expect to be missing in python until they are added to pyspark expected_missing_in_py = { "unix_nanos", # SPARK-57527: PySpark support tracked as a follow-up + "timestamp_nanos", # SPARK-57526: PySpark support tracked as a follow-up } self.assertEqual( diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index 76748f0ae942..8aea50291cdc 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -8569,6 +8569,15 @@ object functions { */ def timestamp_micros(e: Column): Column = Column.fn("timestamp_micros", e) + /** + * Creates a timestamp with the local time zone and nanosecond precision (TIMESTAMP_LTZ(9)) from + * the number of nanoseconds since UTC epoch. + * + * @group datetime_funcs + * @since 4.3.0 + */ + def timestamp_nanos(e: Column): Column = Column.fn("timestamp_nanos", e) + /** * Gets the difference between the timestamps in the specified units by truncating the fraction * part. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 2c47fca543a9..415a842c9bf4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -774,6 +774,7 @@ object FunctionRegistry { expression[SecondsToTimestamp]("timestamp_seconds"), expression[MillisToTimestamp]("timestamp_millis"), expression[MicrosToTimestamp]("timestamp_micros"), + expression[NanosToTimestamp]("timestamp_nanos"), expression[UnixSeconds]("unix_seconds"), expression[UnixMillis]("unix_millis"), expression[UnixMicros]("unix_micros"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 3fbef82ef246..48d54cff8cba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -759,6 +759,66 @@ case class MicrosToTimestamp(child: Expression) copy(child = newChild) } +// scalastyle:off line.size.limit line.contains.tab +@ExpressionDescription( + usage = "_FUNC_(nanoseconds) - Creates timestamp with the local time zone and nanosecond precision (TIMESTAMP_LTZ(9)) from the number of nanoseconds since UTC epoch.", + examples = """ + Examples: + > SET spark.sql.timestampNanosTypes.enabled=true; + spark.sql.timestampNanosTypes.enabled true + > SELECT _FUNC_(1230219000123456789); + 2008-12-25 07:30:00.123456789 + """, + group = "datetime_funcs", + since = "4.3.0") +// scalastyle:on line.size.limit line.contains.tab +case class NanosToTimestamp(child: Expression) + extends UnaryExpression with ImplicitCastInputTypes { + override def nullIntolerant: Boolean = true + + // A nanosecond count needs DECIMAL to span the full [0001, 9999] calendar range: nanos for year + // 9999 (~2.5e20) overflows a 64-bit BIGINT, the same reason the inverse `unix_nanos` returns + // DECIMAL(21, 0). ImplicitCastInputTypes coerces integral arguments to their natural decimal, so + // an ordinary BIGINT argument still works while DECIMAL literals reach the whole range. + override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType) + + override def dataType: DataType = TimestampLTZNanosType(9) + + // Maps the integer nanosecond count to the (epochMicros, nanosWithinMicro) pair with floor + // semantics, so the sub-microsecond remainder is always in [0, 999] (matching the negative-input + // behavior of `floorDiv`/`floorMod`). `longValueExact` throws when `epochMicros` overflows 64 + // bits, i.e. the input is outside the representable timestamp range. + override def nullSafeEval(input: Any): Any = { + val n = input.asInstanceOf[Decimal].toJavaBigDecimal + .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger + val thousand = BigInteger.valueOf(NANOS_PER_MICROS) + val rem = n.mod(thousand) + val micros = n.subtract(rem).divide(thousand).longValueExact() + TimestampNanosVal.fromParts(micros, rem.shortValueExact()) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, c => { + val n = ctx.freshName("nanos") + val thousand = ctx.freshName("thousand") + val rem = ctx.freshName("rem") + s""" + |java.math.BigInteger $n = $c.toJavaBigDecimal() + | .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger(); + |java.math.BigInteger $thousand = java.math.BigInteger.valueOf(${NANOS_PER_MICROS}L); + |java.math.BigInteger $rem = $n.mod($thousand); + |${ev.value} = org.apache.spark.unsafe.types.TimestampNanosVal.fromParts( + | $n.subtract($rem).divide($thousand).longValueExact(), $rem.shortValueExact()); + |""".stripMargin + }) + } + + override def prettyName: String = "timestamp_nanos" + + override protected def withNewChildInternal(newChild: Expression): NanosToTimestamp = + copy(child = newChild) +} + abstract class TimestampToLongBase extends UnaryExpression with ExpectsInputTypes { override def nullIntolerant: Boolean = true diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 8771123ad120..6e0c73fb9ae3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1743,6 +1743,46 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from nanoseconds") { + import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._ + + // The child is a DECIMAL after analysis (ImplicitCastInputTypes coerces integral arguments); + // build the post-coercion literal directly. A wide DECIMAL(38, 0) holds every input below. + def tsNanos(n: BigInt): NanosToTimestamp = + NanosToTimestamp(Literal.create(Decimal(BigDecimal(n), 38, 0), DecimalType(38, 0))) + + assert(tsNanos(0).dataType === TimestampLTZNanosType(9)) + + // The JIRA example: 1230219000123456789 ns -> 1230219000123456 micros + 789 ns. + checkEvaluation(tsNanos(BigInt("1230219000123456789")), nanosVal(1230219000123456L, 789)) + + // Pre-epoch / negative inputs use floor semantics, so nanosWithinMicro stays in [0, 999]: + // -1 ns floors to epochMicros = -1 with a 999 ns remainder. + checkEvaluation(tsNanos(BigInt(-1)), nanosVal(-1L, 999)) + checkEvaluation(tsNanos(BigInt(-1000)), nanosVal(-1L, 0)) + checkEvaluation(tsNanos(BigInt(-1500)), nanosVal(-2L, 500)) + + // NULL input. + checkEvaluation( + NanosToTimestamp(Literal.create(null, DecimalType(38, 0))), null) + + // Full [0001, 9999] range: a DECIMAL nanosecond count far beyond a 64-bit BIGINT decodes + // losslessly back to the original value (proving the function spans the whole calendar range). + Seq( + localDateTimeToNanosVal(timestampNTZ(9999, 12, 31, 23, 59, 59, 999999999)), + localDateTimeToNanosVal(timestampNTZ(1, 1, 1, 0, 0, 0, 1)) + ).foreach { v => + val n = BigInt(v.epochMicros) * NANOS_PER_MICROS + v.nanosWithinMicro.toInt + checkEvaluation(tsNanos(n), v) + // Round-trips with the inverse unix_nanos for the same full-range values. + checkEvaluation(UnixNanos(tsNanos(n)), Decimal(BigDecimal(n), 21, 0)) + } + + // Out-of-range input: epochMicros overflows a 64-bit long, so longValueExact throws. + checkExceptionInExpression[ArithmeticException]( + tsNanos(BigInt("10000000000000000000000000")), "out of long range") + } + test("TIMESTAMP_SECONDS") { def testIntegralFunc(value: Number): Unit = { checkEvaluation( diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 3ff81b7f57f0..6297aece4cbb 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -255,6 +255,7 @@ | org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct | | org.apache.spark.sql.catalyst.expressions.NTile | ntile | SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.NaNvl | nanvl | SELECT nanvl(cast('NaN' as double), 123) | struct | +| org.apache.spark.sql.catalyst.expressions.NanosToTimestamp | timestamp_nanos | SELECT timestamp_nanos(1230219000123456789) | struct | | org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct | | org.apache.spark.sql.catalyst.expressions.Not | ! | SELECT ! true | struct<(NOT true):boolean> | | org.apache.spark.sql.catalyst.expressions.Not | not | SELECT not true | struct<(NOT true):boolean> | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index a4dadf760088..d8e4fd5e7dce 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -762,3 +762,31 @@ SELECT unix_nanos(NULL :: timestamp_ltz(9)) -- !query analysis Project [unix_nanos(cast(null as timestamp_ltz(9))) AS unix_nanos(CAST(NULL AS TIMESTAMP_LTZ(9)))#x] +- OneRowRelation + + +-- !query +SELECT timestamp_nanos(1230219000123456789) +-- !query analysis +Project [timestamp_nanos(cast(1230219000123456789 as decimal(20,0))) AS timestamp_nanos(1230219000123456789)#x] ++- OneRowRelation + + +-- !query +SELECT timestamp_nanos(-1) +-- !query analysis +Project [timestamp_nanos(cast(-1 as decimal(10,0))) AS timestamp_nanos(-1)#x] ++- OneRowRelation + + +-- !query +SELECT timestamp_nanos(253402300799999999999BD) +-- !query analysis +Project [timestamp_nanos(253402300799999999999) AS timestamp_nanos(253402300799999999999)#x] ++- OneRowRelation + + +-- !query +SELECT timestamp_nanos(CAST(NULL AS BIGINT)) +-- !query analysis +Project [timestamp_nanos(cast(cast(null as bigint) as decimal(20,0))) AS timestamp_nanos(CAST(NULL AS BIGINT))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index e208704196ba..b8dc5c47eb56 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -215,3 +215,13 @@ SELECT unix_nanos(TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999 UTC'); SELECT unix_nanos(TIMESTAMP_LTZ '1960-01-01 00:00:00.000000001 UTC'); -- NULL nanosecond timestamp. SELECT unix_nanos(NULL :: timestamp_ltz(9)); + +-- SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from a nanosecond count since the epoch. +-- Integral arguments are implicitly cast to DECIMAL; the LTZ result renders in the session zone. +SELECT timestamp_nanos(1230219000123456789); +-- Negative input floors toward the past, so the sub-microsecond remainder stays in [0, 999]. +SELECT timestamp_nanos(-1); +-- DECIMAL input reaches beyond a 64-bit BIGINT, up to year 9999 (nanos ~ 2.5e20). +SELECT timestamp_nanos(253402300799999999999BD); +-- NULL input. +SELECT timestamp_nanos(CAST(NULL AS BIGINT)); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index 1f75f01da848..19fda1102997 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -854,3 +854,35 @@ SELECT unix_nanos(NULL :: timestamp_ltz(9)) struct -- !query output NULL + + +-- !query +SELECT timestamp_nanos(1230219000123456789) +-- !query schema +struct +-- !query output +2008-12-25 07:30:00.123456789 + + +-- !query +SELECT timestamp_nanos(-1) +-- !query schema +struct +-- !query output +1969-12-31 15:59:59.999999999 + + +-- !query +SELECT timestamp_nanos(253402300799999999999BD) +-- !query schema +struct +-- !query output +9999-12-31 15:59:59.999999999 + + +-- !query +SELECT timestamp_nanos(CAST(NULL AS BIGINT)) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala index da2e9d3a8d88..b94aa4646667 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala @@ -481,6 +481,41 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { checkAnswer(ltz.select(unix_nanos(col("c"))), Row(null)) } } + + test("SPARK-57526: timestamp_nanos builds nanosecond-precision TIMESTAMP_LTZ values") { + // 1230219000123456789 ns since the epoch -> 2008-12-25 15:30:00.123456789 UTC. The result is a + // TIMESTAMP_LTZ(9); collecting it yields the absolute Instant regardless of the session zone. + val nanos = 1230219000123456789L + val instant = Instant.parse("2008-12-25T15:30:00.123456789Z") + val sqlRes = spark.sql(s"SELECT timestamp_nanos($nanos)") + val colRes = spark.range(1).select(timestamp_nanos(lit(nanos))) + // The SQL and Scala Column API agree, return the expected instant, and keep the LTZ(9) type. + checkAnswer(sqlRes, colRes) + checkAnswer(sqlRes, Row(instant)) + assert(sqlRes.schema.head.dataType === TimestampLTZNanosType(9)) + + // A BIGINT argument is implicitly cast to DECIMAL, so the integral literal works directly. + checkAnswer(spark.sql(s"SELECT timestamp_nanos(${nanos}L)"), Row(instant)) + + // DECIMAL input reaches the full [0001, 9999] calendar range, beyond a 64-bit BIGINT of nanos. + Seq( + Instant.parse("9999-12-31T23:59:59.999999999Z"), + Instant.parse("0001-01-01T00:00:00.000000001Z") + ).foreach { i => + val n = BigInt(i.getEpochSecond) * 1000000000L + i.getNano + checkAnswer( + spark.range(1).select(timestamp_nanos(lit(BigDecimal(n).bigDecimal))), + Row(i)) + } + } + + test("SPARK-57526: timestamp_nanos over NULL input") { + val df = spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(null))), + new StructType().add("n", LongType)) + checkAnswer(df.select(timestamp_nanos(col("n"))), Row(null)) + checkAnswer(df.selectExpr("timestamp_nanos(n)"), Row(null)) + } } // Runs the nanosecond timestamp function tests with ANSI mode enabled explicitly. From e56f3b63a4d750e5a80d708be0a31806c7799836 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 19 Jun 2026 18:45:00 +0200 Subject: [PATCH 2/4] [SPARK-57526][SQL] Reject FLOAT/DOUBLE/STRING from timestamp_nanos at analysis `NanosToTimestamp` declared `inputTypes = Seq(DecimalType)` with `ImplicitCastInputTypes`, which silently coerced FLOAT/DOUBLE/STRING to DECIMAL(14,7)/(30,15)/(38,18). Those targets hold far fewer integer digits than a realistic nanosecond count, so a finite FLOAT/DOUBLE argument overflowed the coerced decimal and yielded NULL (ANSI off) or an overflow error (ANSI on) instead of a timestamp -- contrary to the documented "accepted and floored" behavior. Switch to `ExpectsInputTypes` with `Seq(TypeCollection(IntegralType, DecimalType))` so only integral and DECIMAL nanosecond counts are accepted; FLOAT/DOUBLE/STRING now fail at analysis with a clear DATATYPE_MISMATCH, matching the "count of time units" semantics of timestamp_micros/millis. The interpreted and codegen paths widen an integral argument to BigInteger directly and keep the DECIMAL floor path unchanged. Add catalyst coverage for the integral path and the FLOAT/DOUBLE/STRING rejection, a SQL rejection case, and regenerate the golden files. Co-authored-by: Isaac --- .../expressions/datetimeExpressions.scala | 33 +++++++++++++------ .../expressions/DateExpressionsSuite.scala | 16 +++++++-- .../timestamp-ltz-nanos.sql.out | 30 +++++++++++++++-- .../sql-tests/inputs/timestamp-ltz-nanos.sql | 4 ++- .../results/timestamp-ltz-nanos.sql.out | 26 +++++++++++++++ 5 files changed, 93 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 48d54cff8cba..f097783f42d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -773,14 +773,17 @@ case class MicrosToTimestamp(child: Expression) since = "4.3.0") // scalastyle:on line.size.limit line.contains.tab case class NanosToTimestamp(child: Expression) - extends UnaryExpression with ImplicitCastInputTypes { + extends UnaryExpression with ExpectsInputTypes { override def nullIntolerant: Boolean = true - // A nanosecond count needs DECIMAL to span the full [0001, 9999] calendar range: nanos for year - // 9999 (~2.5e20) overflows a 64-bit BIGINT, the same reason the inverse `unix_nanos` returns - // DECIMAL(21, 0). ImplicitCastInputTypes coerces integral arguments to their natural decimal, so - // an ordinary BIGINT argument still works while DECIMAL literals reach the whole range. - override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType) + // Accepts an integral or DECIMAL nanosecond count only. DECIMAL is required to span the full + // [0001, 9999] calendar range: nanos for year 9999 (~2.5e20) overflow a 64-bit BIGINT, the same + // reason the inverse `unix_nanos` returns DECIMAL(21, 0); an integral argument is widened to + // BigInteger directly. FLOAT/DOUBLE/STRING are intentionally rejected at analysis rather than + // implicitly coerced: a fractional or string nanosecond count is not meaningful, and the implicit + // DECIMAL coercion (FLOAT -> DECIMAL(14, 7), DOUBLE -> DECIMAL(30, 15)) would silently overflow + // for realistic magnitudes. + override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(IntegralType, DecimalType)) override def dataType: DataType = TimestampLTZNanosType(9) @@ -789,8 +792,13 @@ case class NanosToTimestamp(child: Expression) // behavior of `floorDiv`/`floorMod`). `longValueExact` throws when `epochMicros` overflows 64 // bits, i.e. the input is outside the representable timestamp range. override def nullSafeEval(input: Any): Any = { - val n = input.asInstanceOf[Decimal].toJavaBigDecimal - .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger + val n = child.dataType match { + case _: DecimalType => + input.asInstanceOf[Decimal].toJavaBigDecimal + .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger + case _: IntegralType => + BigInteger.valueOf(input.asInstanceOf[Number].longValue()) + } val thousand = BigInteger.valueOf(NANOS_PER_MICROS) val rem = n.mod(thousand) val micros = n.subtract(rem).divide(thousand).longValueExact() @@ -802,9 +810,14 @@ case class NanosToTimestamp(child: Expression) val n = ctx.freshName("nanos") val thousand = ctx.freshName("thousand") val rem = ctx.freshName("rem") + val toBigInteger = child.dataType match { + case _: DecimalType => + s"$c.toJavaBigDecimal().setScale(0, java.math.RoundingMode.FLOOR).toBigInteger()" + case _: IntegralType => + s"java.math.BigInteger.valueOf((long) $c)" + } s""" - |java.math.BigInteger $n = $c.toJavaBigDecimal() - | .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger(); + |java.math.BigInteger $n = $toBigInteger; |java.math.BigInteger $thousand = java.math.BigInteger.valueOf(${NANOS_PER_MICROS}L); |java.math.BigInteger $rem = $n.mod($thousand); |${ev.value} = org.apache.spark.unsafe.types.TimestampNanosVal.fromParts( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 6e0c73fb9ae3..0e75c2eab81d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1746,8 +1746,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from nanoseconds") { import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._ - // The child is a DECIMAL after analysis (ImplicitCastInputTypes coerces integral arguments); - // build the post-coercion literal directly. A wide DECIMAL(38, 0) holds every input below. + // DECIMAL input is accepted as-is; a wide DECIMAL(38, 0) holds every input below. def tsNanos(n: BigInt): NanosToTimestamp = NanosToTimestamp(Literal.create(Decimal(BigDecimal(n), 38, 0), DecimalType(38, 0))) @@ -1756,6 +1755,19 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // The JIRA example: 1230219000123456789 ns -> 1230219000123456 micros + 789 ns. checkEvaluation(tsNanos(BigInt("1230219000123456789")), nanosVal(1230219000123456L, 789)) + // An integral argument is accepted directly (widened to BigInteger), exercising the + // IntegralType eval/codegen path rather than the DECIMAL one. + checkEvaluation( + NanosToTimestamp(Literal(1230219000123456789L)), nanosVal(1230219000123456L, 789)) + checkEvaluation(NanosToTimestamp(Literal(-1L)), nanosVal(-1L, 999)) + checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0)) + + // FLOAT/DOUBLE/STRING are rejected at analysis: a fractional or string nanosecond count is not + // meaningful, and the implicit DECIMAL coercion would silently overflow for realistic values. + Seq(Literal(1.0f), Literal(1.0d), Literal("1")).foreach { lit => + assert(NanosToTimestamp(lit).checkInputDataTypes().isFailure) + } + // Pre-epoch / negative inputs use floor semantics, so nanosWithinMicro stays in [0, 999]: // -1 ns floors to epochMicros = -1 with a 999 ns remainder. checkEvaluation(tsNanos(BigInt(-1)), nanosVal(-1L, 999)) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index d8e4fd5e7dce..9bfec7b8b6e1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -767,14 +767,14 @@ Project [unix_nanos(cast(null as timestamp_ltz(9))) AS unix_nanos(CAST(NULL AS T -- !query SELECT timestamp_nanos(1230219000123456789) -- !query analysis -Project [timestamp_nanos(cast(1230219000123456789 as decimal(20,0))) AS timestamp_nanos(1230219000123456789)#x] +Project [timestamp_nanos(1230219000123456789) AS timestamp_nanos(1230219000123456789)#x] +- OneRowRelation -- !query SELECT timestamp_nanos(-1) -- !query analysis -Project [timestamp_nanos(cast(-1 as decimal(10,0))) AS timestamp_nanos(-1)#x] +Project [timestamp_nanos(-1) AS timestamp_nanos(-1)#x] +- OneRowRelation @@ -785,8 +785,32 @@ Project [timestamp_nanos(253402300799999999999) AS timestamp_nanos(2534023007999 +- OneRowRelation +-- !query +SELECT timestamp_nanos(1.0D) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"1.0\"", + "inputType" : "\"DOUBLE\"", + "paramIndex" : "first", + "requiredType" : "(\"INTEGRAL\" or \"DECIMAL\")", + "sqlExpr" : "\"timestamp_nanos(1.0)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "timestamp_nanos(1.0D)" + } ] +} + + -- !query SELECT timestamp_nanos(CAST(NULL AS BIGINT)) -- !query analysis -Project [timestamp_nanos(cast(cast(null as bigint) as decimal(20,0))) AS timestamp_nanos(CAST(NULL AS BIGINT))#x] +Project [timestamp_nanos(cast(null as bigint)) AS timestamp_nanos(CAST(NULL AS BIGINT))#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index b8dc5c47eb56..6d7c8ed3f65d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -217,11 +217,13 @@ SELECT unix_nanos(TIMESTAMP_LTZ '1960-01-01 00:00:00.000000001 UTC'); SELECT unix_nanos(NULL :: timestamp_ltz(9)); -- SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from a nanosecond count since the epoch. --- Integral arguments are implicitly cast to DECIMAL; the LTZ result renders in the session zone. +-- An integral argument is accepted directly; the LTZ result renders in the session zone. SELECT timestamp_nanos(1230219000123456789); -- Negative input floors toward the past, so the sub-microsecond remainder stays in [0, 999]. SELECT timestamp_nanos(-1); -- DECIMAL input reaches beyond a 64-bit BIGINT, up to year 9999 (nanos ~ 2.5e20). SELECT timestamp_nanos(253402300799999999999BD); +-- DOUBLE is rejected at analysis: only integral and DECIMAL nanosecond counts are accepted. +SELECT timestamp_nanos(1.0D); -- NULL input. SELECT timestamp_nanos(CAST(NULL AS BIGINT)); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index 19fda1102997..dbb732a33ef5 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -880,6 +880,32 @@ struct 9999-12-31 15:59:59.999999999 +-- !query +SELECT timestamp_nanos(1.0D) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"1.0\"", + "inputType" : "\"DOUBLE\"", + "paramIndex" : "first", + "requiredType" : "(\"INTEGRAL\" or \"DECIMAL\")", + "sqlExpr" : "\"timestamp_nanos(1.0)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "timestamp_nanos(1.0D)" + } ] +} + + -- !query SELECT timestamp_nanos(CAST(NULL AS BIGINT)) -- !query schema From 0a83affa7b22beb2f122dc292d54001e13599374 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 19 Jun 2026 19:01:54 +0200 Subject: [PATCH 3/4] [SPARK-57526][SQL] Raise DATETIME_OVERFLOW for timestamp_nanos overflow and add negative tests `NanosToTimestamp` let `BigInteger.longValueExact()` throw a raw `java.lang.ArithmeticException` when `epochMicros` overflows a 64-bit long. Surface it instead as a proper Spark error condition: add `QueryExecutionErrors.timestampNanosOverflowError`, which raises a `SparkArithmeticException` with the `DATETIME_OVERFLOW` condition (SQLSTATE 22008), and catch/rethrow in both the interpreted and codegen paths. Strengthen the negative coverage: the catalyst FLOAT/DOUBLE/STRING rejection now asserts the `UNEXPECTED_INPUT_TYPE` `DataTypeMismatch` (not just `isFailure`), the overflow test asserts the `DATETIME_OVERFLOW` condition via `checkErrorInExpression`, and a SQL golden case exercises the runtime overflow end-to-end. Regenerate the golden files. Co-authored-by: Isaac --- .../expressions/datetimeExpressions.scala | 21 +++++++++++++++---- .../sql/errors/QueryExecutionErrors.scala | 10 +++++++++ .../expressions/DateExpressionsSuite.scala | 12 +++++++---- .../timestamp-ltz-nanos.sql.out | 7 +++++++ .../sql-tests/inputs/timestamp-ltz-nanos.sql | 2 ++ .../results/timestamp-ltz-nanos.sql.out | 15 +++++++++++++ 6 files changed, 59 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index f097783f42d5..80505f77125f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -789,8 +789,9 @@ case class NanosToTimestamp(child: Expression) // Maps the integer nanosecond count to the (epochMicros, nanosWithinMicro) pair with floor // semantics, so the sub-microsecond remainder is always in [0, 999] (matching the negative-input - // behavior of `floorDiv`/`floorMod`). `longValueExact` throws when `epochMicros` overflows 64 - // bits, i.e. the input is outside the representable timestamp range. + // behavior of `floorDiv`/`floorMod`). When `epochMicros` overflows 64 bits -- i.e. the input is + // outside the representable timestamp range -- `longValueExact` throws, which is surfaced as a + // DATETIME_OVERFLOW error. override def nullSafeEval(input: Any): Any = { val n = child.dataType match { case _: DecimalType => @@ -801,7 +802,11 @@ case class NanosToTimestamp(child: Expression) } val thousand = BigInteger.valueOf(NANOS_PER_MICROS) val rem = n.mod(thousand) - val micros = n.subtract(rem).divide(thousand).longValueExact() + val micros = try { + n.subtract(rem).divide(thousand).longValueExact() + } catch { + case _: ArithmeticException => throw QueryExecutionErrors.timestampNanosOverflowError(n) + } TimestampNanosVal.fromParts(micros, rem.shortValueExact()) } @@ -810,18 +815,26 @@ case class NanosToTimestamp(child: Expression) val n = ctx.freshName("nanos") val thousand = ctx.freshName("thousand") val rem = ctx.freshName("rem") + val micros = ctx.freshName("micros") val toBigInteger = child.dataType match { case _: DecimalType => s"$c.toJavaBigDecimal().setScale(0, java.math.RoundingMode.FLOOR).toBigInteger()" case _: IntegralType => s"java.math.BigInteger.valueOf((long) $c)" } + val errors = QueryExecutionErrors.getClass.getName.stripSuffix("$") s""" |java.math.BigInteger $n = $toBigInteger; |java.math.BigInteger $thousand = java.math.BigInteger.valueOf(${NANOS_PER_MICROS}L); |java.math.BigInteger $rem = $n.mod($thousand); + |long $micros; + |try { + | $micros = $n.subtract($rem).divide($thousand).longValueExact(); + |} catch (java.lang.ArithmeticException e) { + | throw $errors.timestampNanosOverflowError($n); + |} |${ev.value} = org.apache.spark.unsafe.types.TimestampNanosVal.fromParts( - | $n.subtract($rem).divide($thousand).longValueExact(), $rem.shortValueExact()); + | $micros, $rem.shortValueExact()); |""".stripMargin }) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 48c3ef0c6a93..f4db9c9041f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2646,6 +2646,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE summary = "") } + def timestampNanosOverflowError(nanos: java.math.BigInteger): SparkArithmeticException = { + new SparkArithmeticException( + errorClass = "DATETIME_OVERFLOW", + messageParameters = Map( + "operation" -> + s"create a TIMESTAMP_LTZ(9) from $nanos nanoseconds since the epoch"), + context = Array.empty, + summary = "") + } + def timeAddIntervalOverflowError( time: Long, timePrecision: Int, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 0e75c2eab81d..a23bb4fc723b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1765,7 +1765,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // FLOAT/DOUBLE/STRING are rejected at analysis: a fractional or string nanosecond count is not // meaningful, and the implicit DECIMAL coercion would silently overflow for realistic values. Seq(Literal(1.0f), Literal(1.0d), Literal("1")).foreach { lit => - assert(NanosToTimestamp(lit).checkInputDataTypes().isFailure) + val mismatch = NanosToTimestamp(lit).checkInputDataTypes().asInstanceOf[DataTypeMismatch] + assert(mismatch.errorSubClass == "UNEXPECTED_INPUT_TYPE") } // Pre-epoch / negative inputs use floor semantics, so nanosWithinMicro stays in [0, 999]: @@ -1790,9 +1791,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(UnixNanos(tsNanos(n)), Decimal(BigDecimal(n), 21, 0)) } - // Out-of-range input: epochMicros overflows a 64-bit long, so longValueExact throws. - checkExceptionInExpression[ArithmeticException]( - tsNanos(BigInt("10000000000000000000000000")), "out of long range") + // Out-of-range input: epochMicros overflows a 64-bit long, surfaced as DATETIME_OVERFLOW. + checkErrorInExpression[SparkArithmeticException]( + tsNanos(BigInt("10000000000000000000000000")), + condition = "DATETIME_OVERFLOW", + parameters = Map("operation" -> + "create a TIMESTAMP_LTZ(9) from 10000000000000000000000000 nanoseconds since the epoch")) } test("TIMESTAMP_SECONDS") { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index 9bfec7b8b6e1..9c15c197d8e7 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -785,6 +785,13 @@ Project [timestamp_nanos(253402300799999999999) AS timestamp_nanos(2534023007999 +- OneRowRelation +-- !query +SELECT timestamp_nanos(10000000000000000000000000BD) +-- !query analysis +Project [timestamp_nanos(10000000000000000000000000) AS timestamp_nanos(10000000000000000000000000)#x] ++- OneRowRelation + + -- !query SELECT timestamp_nanos(1.0D) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index 6d7c8ed3f65d..bad3c1aee842 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -223,6 +223,8 @@ SELECT timestamp_nanos(1230219000123456789); SELECT timestamp_nanos(-1); -- DECIMAL input reaches beyond a 64-bit BIGINT, up to year 9999 (nanos ~ 2.5e20). SELECT timestamp_nanos(253402300799999999999BD); +-- Out-of-range input: epochMicros overflows a 64-bit long, so the conversion fails at runtime. +SELECT timestamp_nanos(10000000000000000000000000BD); -- DOUBLE is rejected at analysis: only integral and DECIMAL nanosecond counts are accepted. SELECT timestamp_nanos(1.0D); -- NULL input. diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index dbb732a33ef5..84987fcb433c 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -880,6 +880,21 @@ struct 9999-12-31 15:59:59.999999999 +-- !query +SELECT timestamp_nanos(10000000000000000000000000BD) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "DATETIME_OVERFLOW", + "sqlState" : "22008", + "messageParameters" : { + "operation" : "create a TIMESTAMP_LTZ(9) from 10000000000000000000000000 nanoseconds since the epoch" + } +} + + -- !query SELECT timestamp_nanos(1.0D) -- !query schema From e81da360031da79a5e98a8d1f654e9d974efc3b7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sat, 20 Jun 2026 09:30:39 +0200 Subject: [PATCH 4/4] [SPARK-57526][SQL] Address review feedback on timestamp_nanos - Fix a stale test comment that still claimed a BIGINT argument is implicitly cast to DECIMAL; after the switch to ExpectsInputTypes it goes through the dedicated IntegralType path (widened to BigInteger), so the comment is updated to match. - Document that, like timestamp_micros/millis/seconds, NanosToTimestamp does not validate the [0001, 9999] calendar range: only the 64-bit epochMicros boundary is guarded (counts up to ~year 294247 are accepted), which is intentional for consistency with the microsecond constructors. - Extend the catalyst IntegralType coverage with TINYINT (Byte) and SMALLINT (Short) literals so every integral width exercises the (long) codegen cast. --- .../sql/catalyst/expressions/datetimeExpressions.scala | 6 ++++++ .../sql/catalyst/expressions/DateExpressionsSuite.scala | 7 +++++-- .../spark/sql/TimestampNanosFunctionsSuiteBase.scala | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 80505f77125f..3f773e5bb6dc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -792,6 +792,12 @@ case class NanosToTimestamp(child: Expression) // behavior of `floorDiv`/`floorMod`). When `epochMicros` overflows 64 bits -- i.e. the input is // outside the representable timestamp range -- `longValueExact` throws, which is surfaced as a // DATETIME_OVERFLOW error. + // + // Like the sibling `timestamp_micros`/`timestamp_millis`/`timestamp_seconds` constructors, the + // result is not validated against the [0001, 9999] calendar range: only the 64-bit `epochMicros` + // boundary is guarded, so a count whose `epochMicros` still fits in a long but lands past year + // 9999 (up to the long-micros maximum, ~year 294247) yields an out-of-range value rather than an + // error. This is intentional, keeping the nanosecond constructor consistent with its micro peers. override def nullSafeEval(input: Any): Any = { val n = child.dataType match { case _: DecimalType => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index a23bb4fc723b..d6b18a9370e0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1756,11 +1756,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(tsNanos(BigInt("1230219000123456789")), nanosVal(1230219000123456L, 789)) // An integral argument is accepted directly (widened to BigInteger), exercising the - // IntegralType eval/codegen path rather than the DECIMAL one. + // IntegralType eval/codegen path rather than the DECIMAL one. Cover every integral width + // (TINYINT/SMALLINT/INT/BIGINT) so the `(long)` codegen cast is checked for each. + checkEvaluation(NanosToTimestamp(Literal(2.toByte)), nanosVal(0L, 2)) + checkEvaluation(NanosToTimestamp(Literal(1000.toShort)), nanosVal(1L, 0)) + checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0)) checkEvaluation( NanosToTimestamp(Literal(1230219000123456789L)), nanosVal(1230219000123456L, 789)) checkEvaluation(NanosToTimestamp(Literal(-1L)), nanosVal(-1L, 999)) - checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0)) // FLOAT/DOUBLE/STRING are rejected at analysis: a fractional or string nanosecond count is not // meaningful, and the implicit DECIMAL coercion would silently overflow for realistic values. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala index b94aa4646667..ab830da6b2ab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala @@ -494,7 +494,8 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession { checkAnswer(sqlRes, Row(instant)) assert(sqlRes.schema.head.dataType === TimestampLTZNanosType(9)) - // A BIGINT argument is implicitly cast to DECIMAL, so the integral literal works directly. + // A BIGINT argument is accepted directly through the dedicated IntegralType path (widened to + // BigInteger, no DECIMAL coercion), so the integral literal works without a cast. checkAnswer(spark.sql(s"SELECT timestamp_nanos(${nanos}L)"), Row(instant)) // DECIMAL input reaches the full [0001, 9999] calendar range, beyond a 64-bit BIGINT of nanos.