From 8b14b7175f8ce1e0d21761af4c6853b20e44c233 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 19 Jun 2026 17:54:26 +0200
Subject: [PATCH 1/4] [SPARK-57526][SQL] Add the `timestamp_nanos` function to
 create nanosecond-precision timestamps from numeric nanoseconds

### What changes were proposed in this pull request?
This PR adds a new built-in function `timestamp_nanos(expr)` that interprets `expr` as the number of nanoseconds since `1970-01-01 00:00:00 UTC` and returns a nanosecond-precision `TIMESTAMP_LTZ(9)`.

Concretely:
- Adds a `NanosToTimestamp` expression in `datetimeExpressions.scala`. It declares a single `DECIMAL` input type with `ImplicitCastInputTypes`, so integral arguments are coerced to their natural decimal automatically while `DECIMAL` arguments are accepted as-is.
- Maps the nanosecond count `N` to the internal `(epochMicros, nanosWithinMicro)` pair with floor semantics (`epochMicros = floorDiv(N, 1000)`, `nanosWithinMicro = floorMod(N, 1000)`, always in `[0, 999]`), computed via `BigInteger` in both the interpreted (`eval`) and codegen (`doGenCode`) paths. `longValueExact` throws `ArithmeticException` when the value is outside the representable timestamp range.
- A `DECIMAL` input (rather than `BIGINT`) is required to reach the full `[0001, 9999]` calendar range: nanoseconds for year 9999 (~2.5e20) overflow a 64-bit `BIGINT`, the same reason the inverse `unix_nanos` returns `DECIMAL(21, 0)`.
- Registers `timestamp_nanos` in `FunctionRegistry` and adds the Scala `functions.timestamp_nanos`.
- Adds catalyst unit tests (interpreted + codegen, full-range and round-trip with `unix_nanos`, overflow), Scala/SQL end-to-end tests, and SQL golden-file coverage.

Scope notes: the PySpark API (classic and Spark Connect Python) and R are out of scope here and tracked as follow-ups; `timestamp_nanos` is recorded in the PySpark function-parity allowlist in the meantime. The Scala Spark Connect client picks up `timestamp_nanos` automatically because `functions.scala` lives in the shared `sql/api` module.

### Why are the changes needed?
Part of the [SPARK-56822](https://issues.apache.org/jira/browse/SPARK-56822) umbrella (timestamps with nanosecond precision). Spark has `timestamp_seconds` / `timestamp_millis` / `timestamp_micros` but no nanosecond counterpart, which is the natural inverse of `unix_nanos`.

### Does this PR introduce _any_ user-facing change?
Yes. A new `timestamp_nanos(expr)` function is available in SQL and the Scala API (including the Scala Spark Connect client). It returns `TIMESTAMP_LTZ(9)`. This is a change only within the unreleased nanosecond-timestamp preview.

Example:

```sql
SELECT timestamp_nanos(1230219000123456789);
-- 2008-12-25 07:30:00.123456789
```

### How was this patch tested?
- `build/sbt 'catalyst/testOnly org.apache.spark.sql.catalyst.expressions.DateExpressionsSuite'`
- `build/sbt 'sql/testOnly org.apache.spark.sql.TimestampNanosFunctionsAnsiOnSuite org.apache.spark.sql.TimestampNanosFunctionsAnsiOffSuite'`
- `build/sbt 'sql/testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite org.apache.spark.sql.ExpressionsSchemaSuite'`
- `SPARK_GENERATE_GOLDEN_FILES=1 build/sbt 'sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z "nanos"'`
- `./dev/scalastyle`

### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Cursor
---
 python/pyspark/sql/tests/test_functions.py    |  1 +
 .../org/apache/spark/sql/functions.scala      |  9 +++
 .../catalyst/analysis/FunctionRegistry.scala  |  1 +
 .../expressions/datetimeExpressions.scala     | 60 +++++++++++++++++++
 .../expressions/DateExpressionsSuite.scala    | 40 +++++++++++++
 .../sql-functions/sql-expression-schema.md    |  1 +
 .../timestamp-ltz-nanos.sql.out               | 28 +++++++++
 .../sql-tests/inputs/timestamp-ltz-nanos.sql  | 10 ++++
 .../results/timestamp-ltz-nanos.sql.out       | 32 ++++++++++
 .../TimestampNanosFunctionsSuiteBase.scala    | 35 +++++++++++
 10 files changed, 217 insertions(+)

diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 10aa01e5a600..c9ca0fca96a7 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -84,6 +84,7 @@ def test_function_parity(self):
         # Functions that we expect to be missing in python until they are added to pyspark
         expected_missing_in_py = {
             "unix_nanos",  # SPARK-57527: PySpark support tracked as a follow-up
+            "timestamp_nanos",  # SPARK-57526: PySpark support tracked as a follow-up
         }
 
         self.assertEqual(
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index 76748f0ae942..8aea50291cdc 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -8569,6 +8569,15 @@ object functions {
    */
   def timestamp_micros(e: Column): Column = Column.fn("timestamp_micros", e)
 
+  /**
+   * Creates a timestamp with the local time zone and nanosecond precision (TIMESTAMP_LTZ(9)) from
+   * the number of nanoseconds since UTC epoch.
+   *
+   * @group datetime_funcs
+   * @since 4.3.0
+   */
+  def timestamp_nanos(e: Column): Column = Column.fn("timestamp_nanos", e)
+
   /**
    * Gets the difference between the timestamps in the specified units by truncating the fraction
    * part.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 2c47fca543a9..415a842c9bf4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -774,6 +774,7 @@ object FunctionRegistry {
     expression[SecondsToTimestamp]("timestamp_seconds"),
     expression[MillisToTimestamp]("timestamp_millis"),
     expression[MicrosToTimestamp]("timestamp_micros"),
+    expression[NanosToTimestamp]("timestamp_nanos"),
     expression[UnixSeconds]("unix_seconds"),
     expression[UnixMillis]("unix_millis"),
     expression[UnixMicros]("unix_micros"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 3fbef82ef246..48d54cff8cba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -759,6 +759,66 @@ case class MicrosToTimestamp(child: Expression)
     copy(child = newChild)
 }
 
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = "_FUNC_(nanoseconds) - Creates timestamp with the local time zone and nanosecond precision (TIMESTAMP_LTZ(9)) from the number of nanoseconds since UTC epoch.",
+  examples = """
+    Examples:
+      > SET spark.sql.timestampNanosTypes.enabled=true;
+      spark.sql.timestampNanosTypes.enabled	true
+      > SELECT _FUNC_(1230219000123456789);
+       2008-12-25 07:30:00.123456789
+  """,
+  group = "datetime_funcs",
+  since = "4.3.0")
+// scalastyle:on line.size.limit line.contains.tab
+case class NanosToTimestamp(child: Expression)
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
+
+  // A nanosecond count needs DECIMAL to span the full [0001, 9999] calendar range: nanos for year
+  // 9999 (~2.5e20) overflows a 64-bit BIGINT, the same reason the inverse `unix_nanos` returns
+  // DECIMAL(21, 0). ImplicitCastInputTypes coerces integral arguments to their natural decimal, so
+  // an ordinary BIGINT argument still works while DECIMAL literals reach the whole range.
+  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType)
+
+  override def dataType: DataType = TimestampLTZNanosType(9)
+
+  // Maps the integer nanosecond count to the (epochMicros, nanosWithinMicro) pair with floor
+  // semantics, so the sub-microsecond remainder is always in [0, 999] (matching the negative-input
+  // behavior of `floorDiv`/`floorMod`). `longValueExact` throws when `epochMicros` overflows 64
+  // bits, i.e. the input is outside the representable timestamp range.
+  override def nullSafeEval(input: Any): Any = {
+    val n = input.asInstanceOf[Decimal].toJavaBigDecimal
+      .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger
+    val thousand = BigInteger.valueOf(NANOS_PER_MICROS)
+    val rem = n.mod(thousand)
+    val micros = n.subtract(rem).divide(thousand).longValueExact()
+    TimestampNanosVal.fromParts(micros, rem.shortValueExact())
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    nullSafeCodeGen(ctx, ev, c => {
+      val n = ctx.freshName("nanos")
+      val thousand = ctx.freshName("thousand")
+      val rem = ctx.freshName("rem")
+      s"""
+         |java.math.BigInteger $n = $c.toJavaBigDecimal()
+         |  .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger();
+         |java.math.BigInteger $thousand = java.math.BigInteger.valueOf(${NANOS_PER_MICROS}L);
+         |java.math.BigInteger $rem = $n.mod($thousand);
+         |${ev.value} = org.apache.spark.unsafe.types.TimestampNanosVal.fromParts(
+         |  $n.subtract($rem).divide($thousand).longValueExact(), $rem.shortValueExact());
+         |""".stripMargin
+    })
+  }
+
+  override def prettyName: String = "timestamp_nanos"
+
+  override protected def withNewChildInternal(newChild: Expression): NanosToTimestamp =
+    copy(child = newChild)
+}
+
 abstract class TimestampToLongBase extends UnaryExpression
   with ExpectsInputTypes {
   override def nullIntolerant: Boolean = true
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 8771123ad120..6e0c73fb9ae3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1743,6 +1743,46 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from nanoseconds") {
+    import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._
+
+    // The child is a DECIMAL after analysis (ImplicitCastInputTypes coerces integral arguments);
+    // build the post-coercion literal directly. A wide DECIMAL(38, 0) holds every input below.
+    def tsNanos(n: BigInt): NanosToTimestamp =
+      NanosToTimestamp(Literal.create(Decimal(BigDecimal(n), 38, 0), DecimalType(38, 0)))
+
+    assert(tsNanos(0).dataType === TimestampLTZNanosType(9))
+
+    // The JIRA example: 1230219000123456789 ns -> 1230219000123456 micros + 789 ns.
+    checkEvaluation(tsNanos(BigInt("1230219000123456789")), nanosVal(1230219000123456L, 789))
+
+    // Pre-epoch / negative inputs use floor semantics, so nanosWithinMicro stays in [0, 999]:
+    // -1 ns floors to epochMicros = -1 with a 999 ns remainder.
+    checkEvaluation(tsNanos(BigInt(-1)), nanosVal(-1L, 999))
+    checkEvaluation(tsNanos(BigInt(-1000)), nanosVal(-1L, 0))
+    checkEvaluation(tsNanos(BigInt(-1500)), nanosVal(-2L, 500))
+
+    // NULL input.
+    checkEvaluation(
+      NanosToTimestamp(Literal.create(null, DecimalType(38, 0))), null)
+
+    // Full [0001, 9999] range: a DECIMAL nanosecond count far beyond a 64-bit BIGINT decodes
+    // losslessly back to the original value (proving the function spans the whole calendar range).
+    Seq(
+      localDateTimeToNanosVal(timestampNTZ(9999, 12, 31, 23, 59, 59, 999999999)),
+      localDateTimeToNanosVal(timestampNTZ(1, 1, 1, 0, 0, 0, 1))
+    ).foreach { v =>
+      val n = BigInt(v.epochMicros) * NANOS_PER_MICROS + v.nanosWithinMicro.toInt
+      checkEvaluation(tsNanos(n), v)
+      // Round-trips with the inverse unix_nanos for the same full-range values.
+      checkEvaluation(UnixNanos(tsNanos(n)), Decimal(BigDecimal(n), 21, 0))
+    }
+
+    // Out-of-range input: epochMicros overflows a 64-bit long, so longValueExact throws.
+    checkExceptionInExpression[ArithmeticException](
+      tsNanos(BigInt("10000000000000000000000000")), "out of long range")
+  }
+
   test("TIMESTAMP_SECONDS") {
     def testIntegralFunc(value: Number): Unit = {
       checkEvaluation(
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 3ff81b7f57f0..6297aece4cbb 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -255,6 +255,7 @@
 | org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct<hash(Spark, array(123), 2):int> |
 | org.apache.spark.sql.catalyst.expressions.NTile | ntile | SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,ntile(2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.NaNvl | nanvl | SELECT nanvl(cast('NaN' as double), 123) | struct<nanvl(CAST(NaN AS DOUBLE), 123):double> |
+| org.apache.spark.sql.catalyst.expressions.NanosToTimestamp | timestamp_nanos | SELECT timestamp_nanos(1230219000123456789) | struct<timestamp_nanos(1230219000123456789):timestamp_ltz(9)> |
 | org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct<next_day(2015-01-14, TU):date> |
 | org.apache.spark.sql.catalyst.expressions.Not | ! | SELECT ! true | struct<(NOT true):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Not | not | SELECT not true | struct<(NOT true):boolean> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
index a4dadf760088..d8e4fd5e7dce 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
@@ -762,3 +762,31 @@ SELECT unix_nanos(NULL :: timestamp_ltz(9))
 -- !query analysis
 Project [unix_nanos(cast(null as timestamp_ltz(9))) AS unix_nanos(CAST(NULL AS TIMESTAMP_LTZ(9)))#x]
 +- OneRowRelation
+
+
+-- !query
+SELECT timestamp_nanos(1230219000123456789)
+-- !query analysis
+Project [timestamp_nanos(cast(1230219000123456789 as decimal(20,0))) AS timestamp_nanos(1230219000123456789)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT timestamp_nanos(-1)
+-- !query analysis
+Project [timestamp_nanos(cast(-1 as decimal(10,0))) AS timestamp_nanos(-1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT timestamp_nanos(253402300799999999999BD)
+-- !query analysis
+Project [timestamp_nanos(253402300799999999999) AS timestamp_nanos(253402300799999999999)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT timestamp_nanos(CAST(NULL AS BIGINT))
+-- !query analysis
+Project [timestamp_nanos(cast(cast(null as bigint) as decimal(20,0))) AS timestamp_nanos(CAST(NULL AS BIGINT))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
index e208704196ba..b8dc5c47eb56 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
@@ -215,3 +215,13 @@ SELECT unix_nanos(TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999 UTC');
 SELECT unix_nanos(TIMESTAMP_LTZ '1960-01-01 00:00:00.000000001 UTC');
 -- NULL nanosecond timestamp.
 SELECT unix_nanos(NULL :: timestamp_ltz(9));
+
+-- SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from a nanosecond count since the epoch.
+-- Integral arguments are implicitly cast to DECIMAL; the LTZ result renders in the session zone.
+SELECT timestamp_nanos(1230219000123456789);
+-- Negative input floors toward the past, so the sub-microsecond remainder stays in [0, 999].
+SELECT timestamp_nanos(-1);
+-- DECIMAL input reaches beyond a 64-bit BIGINT, up to year 9999 (nanos ~ 2.5e20).
+SELECT timestamp_nanos(253402300799999999999BD);
+-- NULL input.
+SELECT timestamp_nanos(CAST(NULL AS BIGINT));
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
index 1f75f01da848..19fda1102997 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
@@ -854,3 +854,35 @@ SELECT unix_nanos(NULL :: timestamp_ltz(9))
 struct<unix_nanos(CAST(NULL AS TIMESTAMP_LTZ(9))):decimal(21,0)>
 -- !query output
 NULL
+
+
+-- !query
+SELECT timestamp_nanos(1230219000123456789)
+-- !query schema
+struct<timestamp_nanos(1230219000123456789):timestamp_ltz(9)>
+-- !query output
+2008-12-25 07:30:00.123456789
+
+
+-- !query
+SELECT timestamp_nanos(-1)
+-- !query schema
+struct<timestamp_nanos(-1):timestamp_ltz(9)>
+-- !query output
+1969-12-31 15:59:59.999999999
+
+
+-- !query
+SELECT timestamp_nanos(253402300799999999999BD)
+-- !query schema
+struct<timestamp_nanos(253402300799999999999):timestamp_ltz(9)>
+-- !query output
+9999-12-31 15:59:59.999999999
+
+
+-- !query
+SELECT timestamp_nanos(CAST(NULL AS BIGINT))
+-- !query schema
+struct<timestamp_nanos(CAST(NULL AS BIGINT)):timestamp_ltz(9)>
+-- !query output
+NULL
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala
index da2e9d3a8d88..b94aa4646667 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala
@@ -481,6 +481,41 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession {
       checkAnswer(ltz.select(unix_nanos(col("c"))), Row(null))
     }
   }
+
+  test("SPARK-57526: timestamp_nanos builds nanosecond-precision TIMESTAMP_LTZ values") {
+    // 1230219000123456789 ns since the epoch -> 2008-12-25 15:30:00.123456789 UTC. The result is a
+    // TIMESTAMP_LTZ(9); collecting it yields the absolute Instant regardless of the session zone.
+    val nanos = 1230219000123456789L
+    val instant = Instant.parse("2008-12-25T15:30:00.123456789Z")
+    val sqlRes = spark.sql(s"SELECT timestamp_nanos($nanos)")
+    val colRes = spark.range(1).select(timestamp_nanos(lit(nanos)))
+    // The SQL and Scala Column API agree, return the expected instant, and keep the LTZ(9) type.
+    checkAnswer(sqlRes, colRes)
+    checkAnswer(sqlRes, Row(instant))
+    assert(sqlRes.schema.head.dataType === TimestampLTZNanosType(9))
+
+    // A BIGINT argument is implicitly cast to DECIMAL, so the integral literal works directly.
+    checkAnswer(spark.sql(s"SELECT timestamp_nanos(${nanos}L)"), Row(instant))
+
+    // DECIMAL input reaches the full [0001, 9999] calendar range, beyond a 64-bit BIGINT of nanos.
+    Seq(
+      Instant.parse("9999-12-31T23:59:59.999999999Z"),
+      Instant.parse("0001-01-01T00:00:00.000000001Z")
+    ).foreach { i =>
+      val n = BigInt(i.getEpochSecond) * 1000000000L + i.getNano
+      checkAnswer(
+        spark.range(1).select(timestamp_nanos(lit(BigDecimal(n).bigDecimal))),
+        Row(i))
+    }
+  }
+
+  test("SPARK-57526: timestamp_nanos over NULL input") {
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(null))),
+      new StructType().add("n", LongType))
+    checkAnswer(df.select(timestamp_nanos(col("n"))), Row(null))
+    checkAnswer(df.selectExpr("timestamp_nanos(n)"), Row(null))
+  }
 }
 
 // Runs the nanosecond timestamp function tests with ANSI mode enabled explicitly.

From e56f3b63a4d750e5a80d708be0a31806c7799836 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 19 Jun 2026 18:45:00 +0200
Subject: [PATCH 2/4] [SPARK-57526][SQL] Reject FLOAT/DOUBLE/STRING from
 timestamp_nanos at analysis

`NanosToTimestamp` declared `inputTypes = Seq(DecimalType)` with
`ImplicitCastInputTypes`, which silently coerced FLOAT/DOUBLE/STRING to
DECIMAL(14,7)/(30,15)/(38,18). Those targets hold far fewer integer digits than
a realistic nanosecond count, so a finite FLOAT/DOUBLE argument overflowed the
coerced decimal and yielded NULL (ANSI off) or an overflow error (ANSI on)
instead of a timestamp -- contrary to the documented "accepted and floored"
behavior.

Switch to `ExpectsInputTypes` with `Seq(TypeCollection(IntegralType,
DecimalType))` so only integral and DECIMAL nanosecond counts are accepted;
FLOAT/DOUBLE/STRING now fail at analysis with a clear DATATYPE_MISMATCH,
matching the "count of time units" semantics of timestamp_micros/millis. The
interpreted and codegen paths widen an integral argument to BigInteger directly
and keep the DECIMAL floor path unchanged. Add catalyst coverage for the
integral path and the FLOAT/DOUBLE/STRING rejection, a SQL rejection case, and
regenerate the golden files.

Co-authored-by: Isaac
---
 .../expressions/datetimeExpressions.scala     | 33 +++++++++++++------
 .../expressions/DateExpressionsSuite.scala    | 16 +++++++--
 .../timestamp-ltz-nanos.sql.out               | 30 +++++++++++++++--
 .../sql-tests/inputs/timestamp-ltz-nanos.sql  |  4 ++-
 .../results/timestamp-ltz-nanos.sql.out       | 26 +++++++++++++++
 5 files changed, 93 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 48d54cff8cba..f097783f42d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -773,14 +773,17 @@ case class MicrosToTimestamp(child: Expression)
   since = "4.3.0")
 // scalastyle:on line.size.limit line.contains.tab
 case class NanosToTimestamp(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes {
+  extends UnaryExpression with ExpectsInputTypes {
   override def nullIntolerant: Boolean = true
 
-  // A nanosecond count needs DECIMAL to span the full [0001, 9999] calendar range: nanos for year
-  // 9999 (~2.5e20) overflows a 64-bit BIGINT, the same reason the inverse `unix_nanos` returns
-  // DECIMAL(21, 0). ImplicitCastInputTypes coerces integral arguments to their natural decimal, so
-  // an ordinary BIGINT argument still works while DECIMAL literals reach the whole range.
-  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType)
+  // Accepts an integral or DECIMAL nanosecond count only. DECIMAL is required to span the full
+  // [0001, 9999] calendar range: nanos for year 9999 (~2.5e20) overflow a 64-bit BIGINT, the same
+  // reason the inverse `unix_nanos` returns DECIMAL(21, 0); an integral argument is widened to
+  // BigInteger directly. FLOAT/DOUBLE/STRING are intentionally rejected at analysis rather than
+  // implicitly coerced: a fractional or string nanosecond count is not meaningful, and the implicit
+  // DECIMAL coercion (FLOAT -> DECIMAL(14, 7), DOUBLE -> DECIMAL(30, 15)) would silently overflow
+  // for realistic magnitudes.
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(IntegralType, DecimalType))
 
   override def dataType: DataType = TimestampLTZNanosType(9)
 
@@ -789,8 +792,13 @@ case class NanosToTimestamp(child: Expression)
   // behavior of `floorDiv`/`floorMod`). `longValueExact` throws when `epochMicros` overflows 64
   // bits, i.e. the input is outside the representable timestamp range.
   override def nullSafeEval(input: Any): Any = {
-    val n = input.asInstanceOf[Decimal].toJavaBigDecimal
-      .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger
+    val n = child.dataType match {
+      case _: DecimalType =>
+        input.asInstanceOf[Decimal].toJavaBigDecimal
+          .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger
+      case _: IntegralType =>
+        BigInteger.valueOf(input.asInstanceOf[Number].longValue())
+    }
     val thousand = BigInteger.valueOf(NANOS_PER_MICROS)
     val rem = n.mod(thousand)
     val micros = n.subtract(rem).divide(thousand).longValueExact()
@@ -802,9 +810,14 @@ case class NanosToTimestamp(child: Expression)
       val n = ctx.freshName("nanos")
       val thousand = ctx.freshName("thousand")
       val rem = ctx.freshName("rem")
+      val toBigInteger = child.dataType match {
+        case _: DecimalType =>
+          s"$c.toJavaBigDecimal().setScale(0, java.math.RoundingMode.FLOOR).toBigInteger()"
+        case _: IntegralType =>
+          s"java.math.BigInteger.valueOf((long) $c)"
+      }
       s"""
-         |java.math.BigInteger $n = $c.toJavaBigDecimal()
-         |  .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger();
+         |java.math.BigInteger $n = $toBigInteger;
          |java.math.BigInteger $thousand = java.math.BigInteger.valueOf(${NANOS_PER_MICROS}L);
          |java.math.BigInteger $rem = $n.mod($thousand);
          |${ev.value} = org.apache.spark.unsafe.types.TimestampNanosVal.fromParts(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 6e0c73fb9ae3..0e75c2eab81d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1746,8 +1746,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from nanoseconds") {
     import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._
 
-    // The child is a DECIMAL after analysis (ImplicitCastInputTypes coerces integral arguments);
-    // build the post-coercion literal directly. A wide DECIMAL(38, 0) holds every input below.
+    // DECIMAL input is accepted as-is; a wide DECIMAL(38, 0) holds every input below.
     def tsNanos(n: BigInt): NanosToTimestamp =
       NanosToTimestamp(Literal.create(Decimal(BigDecimal(n), 38, 0), DecimalType(38, 0)))
 
@@ -1756,6 +1755,19 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // The JIRA example: 1230219000123456789 ns -> 1230219000123456 micros + 789 ns.
     checkEvaluation(tsNanos(BigInt("1230219000123456789")), nanosVal(1230219000123456L, 789))
 
+    // An integral argument is accepted directly (widened to BigInteger), exercising the
+    // IntegralType eval/codegen path rather than the DECIMAL one.
+    checkEvaluation(
+      NanosToTimestamp(Literal(1230219000123456789L)), nanosVal(1230219000123456L, 789))
+    checkEvaluation(NanosToTimestamp(Literal(-1L)), nanosVal(-1L, 999))
+    checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0))
+
+    // FLOAT/DOUBLE/STRING are rejected at analysis: a fractional or string nanosecond count is not
+    // meaningful, and the implicit DECIMAL coercion would silently overflow for realistic values.
+    Seq(Literal(1.0f), Literal(1.0d), Literal("1")).foreach { lit =>
+      assert(NanosToTimestamp(lit).checkInputDataTypes().isFailure)
+    }
+
     // Pre-epoch / negative inputs use floor semantics, so nanosWithinMicro stays in [0, 999]:
     // -1 ns floors to epochMicros = -1 with a 999 ns remainder.
     checkEvaluation(tsNanos(BigInt(-1)), nanosVal(-1L, 999))
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
index d8e4fd5e7dce..9bfec7b8b6e1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
@@ -767,14 +767,14 @@ Project [unix_nanos(cast(null as timestamp_ltz(9))) AS unix_nanos(CAST(NULL AS T
 -- !query
 SELECT timestamp_nanos(1230219000123456789)
 -- !query analysis
-Project [timestamp_nanos(cast(1230219000123456789 as decimal(20,0))) AS timestamp_nanos(1230219000123456789)#x]
+Project [timestamp_nanos(1230219000123456789) AS timestamp_nanos(1230219000123456789)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT timestamp_nanos(-1)
 -- !query analysis
-Project [timestamp_nanos(cast(-1 as decimal(10,0))) AS timestamp_nanos(-1)#x]
+Project [timestamp_nanos(-1) AS timestamp_nanos(-1)#x]
 +- OneRowRelation
 
 
@@ -785,8 +785,32 @@ Project [timestamp_nanos(253402300799999999999) AS timestamp_nanos(2534023007999
 +- OneRowRelation
 
 
+-- !query
+SELECT timestamp_nanos(1.0D)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"INTEGRAL\" or \"DECIMAL\")",
+    "sqlExpr" : "\"timestamp_nanos(1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "timestamp_nanos(1.0D)"
+  } ]
+}
+
+
 -- !query
 SELECT timestamp_nanos(CAST(NULL AS BIGINT))
 -- !query analysis
-Project [timestamp_nanos(cast(cast(null as bigint) as decimal(20,0))) AS timestamp_nanos(CAST(NULL AS BIGINT))#x]
+Project [timestamp_nanos(cast(null as bigint)) AS timestamp_nanos(CAST(NULL AS BIGINT))#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
index b8dc5c47eb56..6d7c8ed3f65d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
@@ -217,11 +217,13 @@ SELECT unix_nanos(TIMESTAMP_LTZ '1960-01-01 00:00:00.000000001 UTC');
 SELECT unix_nanos(NULL :: timestamp_ltz(9));
 
 -- SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from a nanosecond count since the epoch.
--- Integral arguments are implicitly cast to DECIMAL; the LTZ result renders in the session zone.
+-- An integral argument is accepted directly; the LTZ result renders in the session zone.
 SELECT timestamp_nanos(1230219000123456789);
 -- Negative input floors toward the past, so the sub-microsecond remainder stays in [0, 999].
 SELECT timestamp_nanos(-1);
 -- DECIMAL input reaches beyond a 64-bit BIGINT, up to year 9999 (nanos ~ 2.5e20).
 SELECT timestamp_nanos(253402300799999999999BD);
+-- DOUBLE is rejected at analysis: only integral and DECIMAL nanosecond counts are accepted.
+SELECT timestamp_nanos(1.0D);
 -- NULL input.
 SELECT timestamp_nanos(CAST(NULL AS BIGINT));
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
index 19fda1102997..dbb732a33ef5 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
@@ -880,6 +880,32 @@ struct<timestamp_nanos(253402300799999999999):timestamp_ltz(9)>
 9999-12-31 15:59:59.999999999
 
 
+-- !query
+SELECT timestamp_nanos(1.0D)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"INTEGRAL\" or \"DECIMAL\")",
+    "sqlExpr" : "\"timestamp_nanos(1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "timestamp_nanos(1.0D)"
+  } ]
+}
+
+
 -- !query
 SELECT timestamp_nanos(CAST(NULL AS BIGINT))
 -- !query schema

From 0a83affa7b22beb2f122dc292d54001e13599374 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 19 Jun 2026 19:01:54 +0200
Subject: [PATCH 3/4] [SPARK-57526][SQL] Raise DATETIME_OVERFLOW for
 timestamp_nanos overflow and add negative tests

`NanosToTimestamp` let `BigInteger.longValueExact()` throw a raw
`java.lang.ArithmeticException` when `epochMicros` overflows a 64-bit long.
Surface it instead as a proper Spark error condition: add
`QueryExecutionErrors.timestampNanosOverflowError`, which raises a
`SparkArithmeticException` with the `DATETIME_OVERFLOW` condition (SQLSTATE
22008), and catch/rethrow in both the interpreted and codegen paths.

Strengthen the negative coverage: the catalyst FLOAT/DOUBLE/STRING rejection now
asserts the `UNEXPECTED_INPUT_TYPE` `DataTypeMismatch` (not just `isFailure`),
the overflow test asserts the `DATETIME_OVERFLOW` condition via
`checkErrorInExpression`, and a SQL golden case exercises the runtime overflow
end-to-end. Regenerate the golden files.

Co-authored-by: Isaac
---
 .../expressions/datetimeExpressions.scala     | 21 +++++++++++++++----
 .../sql/errors/QueryExecutionErrors.scala     | 10 +++++++++
 .../expressions/DateExpressionsSuite.scala    | 12 +++++++----
 .../timestamp-ltz-nanos.sql.out               |  7 +++++++
 .../sql-tests/inputs/timestamp-ltz-nanos.sql  |  2 ++
 .../results/timestamp-ltz-nanos.sql.out       | 15 +++++++++++++
 6 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index f097783f42d5..80505f77125f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -789,8 +789,9 @@ case class NanosToTimestamp(child: Expression)
 
   // Maps the integer nanosecond count to the (epochMicros, nanosWithinMicro) pair with floor
   // semantics, so the sub-microsecond remainder is always in [0, 999] (matching the negative-input
-  // behavior of `floorDiv`/`floorMod`). `longValueExact` throws when `epochMicros` overflows 64
-  // bits, i.e. the input is outside the representable timestamp range.
+  // behavior of `floorDiv`/`floorMod`). When `epochMicros` overflows 64 bits -- i.e. the input is
+  // outside the representable timestamp range -- `longValueExact` throws, which is surfaced as a
+  // DATETIME_OVERFLOW error.
   override def nullSafeEval(input: Any): Any = {
     val n = child.dataType match {
       case _: DecimalType =>
@@ -801,7 +802,11 @@ case class NanosToTimestamp(child: Expression)
     }
     val thousand = BigInteger.valueOf(NANOS_PER_MICROS)
     val rem = n.mod(thousand)
-    val micros = n.subtract(rem).divide(thousand).longValueExact()
+    val micros = try {
+      n.subtract(rem).divide(thousand).longValueExact()
+    } catch {
+      case _: ArithmeticException => throw QueryExecutionErrors.timestampNanosOverflowError(n)
+    }
     TimestampNanosVal.fromParts(micros, rem.shortValueExact())
   }
 
@@ -810,18 +815,26 @@ case class NanosToTimestamp(child: Expression)
       val n = ctx.freshName("nanos")
       val thousand = ctx.freshName("thousand")
       val rem = ctx.freshName("rem")
+      val micros = ctx.freshName("micros")
       val toBigInteger = child.dataType match {
         case _: DecimalType =>
           s"$c.toJavaBigDecimal().setScale(0, java.math.RoundingMode.FLOOR).toBigInteger()"
         case _: IntegralType =>
           s"java.math.BigInteger.valueOf((long) $c)"
       }
+      val errors = QueryExecutionErrors.getClass.getName.stripSuffix("$")
       s"""
          |java.math.BigInteger $n = $toBigInteger;
          |java.math.BigInteger $thousand = java.math.BigInteger.valueOf(${NANOS_PER_MICROS}L);
          |java.math.BigInteger $rem = $n.mod($thousand);
+         |long $micros;
+         |try {
+         |  $micros = $n.subtract($rem).divide($thousand).longValueExact();
+         |} catch (java.lang.ArithmeticException e) {
+         |  throw $errors.timestampNanosOverflowError($n);
+         |}
          |${ev.value} = org.apache.spark.unsafe.types.TimestampNanosVal.fromParts(
-         |  $n.subtract($rem).divide($thousand).longValueExact(), $rem.shortValueExact());
+         |  $micros, $rem.shortValueExact());
          |""".stripMargin
     })
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 48c3ef0c6a93..f4db9c9041f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2646,6 +2646,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       summary = "")
   }
 
+  def timestampNanosOverflowError(nanos: java.math.BigInteger): SparkArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "DATETIME_OVERFLOW",
+      messageParameters = Map(
+        "operation" ->
+          s"create a TIMESTAMP_LTZ(9) from $nanos nanoseconds since the epoch"),
+      context = Array.empty,
+      summary = "")
+  }
+
   def timeAddIntervalOverflowError(
       time: Long,
       timePrecision: Int,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 0e75c2eab81d..a23bb4fc723b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1765,7 +1765,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // FLOAT/DOUBLE/STRING are rejected at analysis: a fractional or string nanosecond count is not
     // meaningful, and the implicit DECIMAL coercion would silently overflow for realistic values.
     Seq(Literal(1.0f), Literal(1.0d), Literal("1")).foreach { lit =>
-      assert(NanosToTimestamp(lit).checkInputDataTypes().isFailure)
+      val mismatch = NanosToTimestamp(lit).checkInputDataTypes().asInstanceOf[DataTypeMismatch]
+      assert(mismatch.errorSubClass == "UNEXPECTED_INPUT_TYPE")
     }
 
     // Pre-epoch / negative inputs use floor semantics, so nanosWithinMicro stays in [0, 999]:
@@ -1790,9 +1791,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(UnixNanos(tsNanos(n)), Decimal(BigDecimal(n), 21, 0))
     }
 
-    // Out-of-range input: epochMicros overflows a 64-bit long, so longValueExact throws.
-    checkExceptionInExpression[ArithmeticException](
-      tsNanos(BigInt("10000000000000000000000000")), "out of long range")
+    // Out-of-range input: epochMicros overflows a 64-bit long, surfaced as DATETIME_OVERFLOW.
+    checkErrorInExpression[SparkArithmeticException](
+      tsNanos(BigInt("10000000000000000000000000")),
+      condition = "DATETIME_OVERFLOW",
+      parameters = Map("operation" ->
+        "create a TIMESTAMP_LTZ(9) from 10000000000000000000000000 nanoseconds since the epoch"))
   }
 
   test("TIMESTAMP_SECONDS") {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
index 9bfec7b8b6e1..9c15c197d8e7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
@@ -785,6 +785,13 @@ Project [timestamp_nanos(253402300799999999999) AS timestamp_nanos(2534023007999
 +- OneRowRelation
 
 
+-- !query
+SELECT timestamp_nanos(10000000000000000000000000BD)
+-- !query analysis
+Project [timestamp_nanos(10000000000000000000000000) AS timestamp_nanos(10000000000000000000000000)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT timestamp_nanos(1.0D)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
index 6d7c8ed3f65d..bad3c1aee842 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
@@ -223,6 +223,8 @@ SELECT timestamp_nanos(1230219000123456789);
 SELECT timestamp_nanos(-1);
 -- DECIMAL input reaches beyond a 64-bit BIGINT, up to year 9999 (nanos ~ 2.5e20).
 SELECT timestamp_nanos(253402300799999999999BD);
+-- Out-of-range input: epochMicros overflows a 64-bit long, so the conversion fails at runtime.
+SELECT timestamp_nanos(10000000000000000000000000BD);
 -- DOUBLE is rejected at analysis: only integral and DECIMAL nanosecond counts are accepted.
 SELECT timestamp_nanos(1.0D);
 -- NULL input.
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
index dbb732a33ef5..84987fcb433c 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
@@ -880,6 +880,21 @@ struct<timestamp_nanos(253402300799999999999):timestamp_ltz(9)>
 9999-12-31 15:59:59.999999999
 
 
+-- !query
+SELECT timestamp_nanos(10000000000000000000000000BD)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DATETIME_OVERFLOW",
+  "sqlState" : "22008",
+  "messageParameters" : {
+    "operation" : "create a TIMESTAMP_LTZ(9) from 10000000000000000000000000 nanoseconds since the epoch"
+  }
+}
+
+
 -- !query
 SELECT timestamp_nanos(1.0D)
 -- !query schema

From e81da360031da79a5e98a8d1f654e9d974efc3b7 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sat, 20 Jun 2026 09:30:39 +0200
Subject: [PATCH 4/4] [SPARK-57526][SQL] Address review feedback on
 timestamp_nanos

- Fix a stale test comment that still claimed a BIGINT argument is implicitly
  cast to DECIMAL; after the switch to ExpectsInputTypes it goes through the
  dedicated IntegralType path (widened to BigInteger), so the comment is updated
  to match.
- Document that, like timestamp_micros/millis/seconds, NanosToTimestamp does not
  validate the [0001, 9999] calendar range: only the 64-bit epochMicros boundary
  is guarded (counts up to ~year 294247 are accepted), which is intentional for
  consistency with the microsecond constructors.
- Extend the catalyst IntegralType coverage with TINYINT (Byte) and SMALLINT
  (Short) literals so every integral width exercises the (long) codegen cast.
---
 .../sql/catalyst/expressions/datetimeExpressions.scala     | 6 ++++++
 .../sql/catalyst/expressions/DateExpressionsSuite.scala    | 7 +++++--
 .../spark/sql/TimestampNanosFunctionsSuiteBase.scala       | 3 ++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 80505f77125f..3f773e5bb6dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -792,6 +792,12 @@ case class NanosToTimestamp(child: Expression)
   // behavior of `floorDiv`/`floorMod`). When `epochMicros` overflows 64 bits -- i.e. the input is
   // outside the representable timestamp range -- `longValueExact` throws, which is surfaced as a
   // DATETIME_OVERFLOW error.
+  //
+  // Like the sibling `timestamp_micros`/`timestamp_millis`/`timestamp_seconds` constructors, the
+  // result is not validated against the [0001, 9999] calendar range: only the 64-bit `epochMicros`
+  // boundary is guarded, so a count whose `epochMicros` still fits in a long but lands past year
+  // 9999 (up to the long-micros maximum, ~year 294247) yields an out-of-range value rather than an
+  // error. This is intentional, keeping the nanosecond constructor consistent with its micro peers.
   override def nullSafeEval(input: Any): Any = {
     val n = child.dataType match {
       case _: DecimalType =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index a23bb4fc723b..d6b18a9370e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1756,11 +1756,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(tsNanos(BigInt("1230219000123456789")), nanosVal(1230219000123456L, 789))
 
     // An integral argument is accepted directly (widened to BigInteger), exercising the
-    // IntegralType eval/codegen path rather than the DECIMAL one.
+    // IntegralType eval/codegen path rather than the DECIMAL one. Cover every integral width
+    // (TINYINT/SMALLINT/INT/BIGINT) so the `(long)` codegen cast is checked for each.
+    checkEvaluation(NanosToTimestamp(Literal(2.toByte)), nanosVal(0L, 2))
+    checkEvaluation(NanosToTimestamp(Literal(1000.toShort)), nanosVal(1L, 0))
+    checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0))
     checkEvaluation(
       NanosToTimestamp(Literal(1230219000123456789L)), nanosVal(1230219000123456L, 789))
     checkEvaluation(NanosToTimestamp(Literal(-1L)), nanosVal(-1L, 999))
-    checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0))
 
     // FLOAT/DOUBLE/STRING are rejected at analysis: a fractional or string nanosecond count is not
     // meaningful, and the implicit DECIMAL coercion would silently overflow for realistic values.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala
index b94aa4646667..ab830da6b2ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala
@@ -494,7 +494,8 @@ abstract class TimestampNanosFunctionsSuiteBase extends SharedSparkSession {
     checkAnswer(sqlRes, Row(instant))
     assert(sqlRes.schema.head.dataType === TimestampLTZNanosType(9))
 
-    // A BIGINT argument is implicitly cast to DECIMAL, so the integral literal works directly.
+    // A BIGINT argument is accepted directly through the dedicated IntegralType path (widened to
+    // BigInteger, no DECIMAL coercion), so the integral literal works without a cast.
     checkAnswer(spark.sql(s"SELECT timestamp_nanos(${nanos}L)"), Row(instant))
 
     // DECIMAL input reaches the full [0001, 9999] calendar range, beyond a 64-bit BIGINT of nanos.