diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala index 0e7d44e98bfb..c70a3fdd62f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.analysis +import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.analysis.TypeCoercion.PromoteStrings.conf import org.apache.spark.sql.catalyst.expressions.{ Alias, @@ -82,6 +83,8 @@ import org.apache.spark.sql.types.{ StringType, StringTypeExpression, StructType, + TimestampLTZNanosType, + TimestampNTZNanosType, TimestampNTZType, TimestampType, TimestampTypeExpression, @@ -244,14 +247,58 @@ abstract class TypeCoercionHelper { (d1, d2) match { case (_, _: TimeType) => None case (_: TimeType, _) => None - case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) => - Some(TimestampType) - case (_: TimestampType, _: TimestampNTZType) | (_: TimestampNTZType, _: TimestampType) => - Some(TimestampType) - - case (_: TimestampNTZType, _: DateType) | (_: DateType, _: TimestampNTZType) => - Some(TimestampNTZType) + // The remaining datetime types (DATE and the micro/nanos TIMESTAMP_LTZ / TIMESTAMP_NTZ + // families) widen along two independent axes: + // - time-zone family: the result is LTZ if either input is LTZ-family, otherwise NTZ. This + // mirrors the microsecond precedent where TIMESTAMP + TIMESTAMP_NTZ widens to TIMESTAMP. + // DATE is family-neutral and adopts the family of the other side. + // - precision: the maximum of the two precisions, where the micro types and DATE count as 6 + // and the nanos types contribute their own precision p in [7, 9]. + // The (family, precision) pair then maps back to a concrete type: precision 6 yields the + // micro type, precision in [7, 9] yields the nanos type. + // + // Note: this common-type resolution is intentionally more permissive than the nanosecond + // conversion rules in Cast.canUpCast / Cast.canANSIStoreAssign, which keep cross-family and + // DATE <-> nanos casts explicit-CAST-only while the nanos types are unreleased (SPARK-57323 + // etc.). Coercion here mirrors the microsecond precedent so that UNION / CASE / coalesce / + // IN / comparison resolve a common type the same way they do for the micro families; the + // stricter explicit-only stance is deliberately scoped to up-cast and store assignment, not + // to common-type resolution. + case _ => + // Fractional-seconds precision of the microsecond timestamp types; the nanos types carry + // 7-9. DATE has no time component and is treated as the micro precision so that + // DATE <-> micro widens to the micro type and DATE <-> nanos to the nanos type. + val MicrosPrecision = 6 + def isLtz(d: DatetimeType): Boolean = + d.isInstanceOf[TimestampType] || d.isInstanceOf[TimestampLTZNanosType] + def isNtz(d: DatetimeType): Boolean = + d.isInstanceOf[TimestampNTZType] || d.isInstanceOf[TimestampNTZNanosType] + def precisionOf(d: DatetimeType): Int = d match { + case t: TimestampLTZNanosType => t.precision + case t: TimestampNTZNanosType => t.precision + case _ => MicrosPrecision // DateType / TimestampType / TimestampNTZType + } + // Beyond TimeType (handled above), the only datetime types are DATE and the micro/nanos + // timestamp families. Guard so that a future DatetimeType subtype fails fast here instead + // of being silently mis-widened (treated as a family-neutral precision-6 type and folded + // into DATE) when it should be wired in explicitly. + def isWidenable(d: DatetimeType): Boolean = + isLtz(d) || isNtz(d) || d.isInstanceOf[DateType] + if (!isWidenable(d1) || !isWidenable(d2)) { + throw SparkException.internalError( + s"Unexpected datetime types in findWiderDateTimeType: $d1, $d2") + } else if (!isLtz(d1) && !isNtz(d1) && !isLtz(d2) && !isNtz(d2)) { + // Both sides are DATE; callers short-circuit equal types, so this is just defensive. + Some(DateType) + } else { + val p = math.max(precisionOf(d1), precisionOf(d2)) + if (isLtz(d1) || isLtz(d2)) { + Some(if (p <= MicrosPrecision) TimestampType else TimestampLTZNanosType(p)) + } else { + Some(if (p <= MicrosPrecision) TimestampNTZType else TimestampNTZNanosType(p)) + } + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala index 1f415c5ede44..dbbce0888640 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala @@ -144,6 +144,15 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase { Seq(DateType, TimestampType, BinaryType, BooleanType).foreach { dt => widenTest(dt, StringType, Some(dt)) } + + // Nanosecond-precision timestamp types (SPARK-57454). + Seq(7, 8, 9).foreach { p => + widenTest(TimestampLTZNanosType(p), StringType, Some(TimestampLTZNanosType(p))) + widenTest(TimestampNTZNanosType(p), StringType, Some(TimestampNTZNanosType(p))) + } + widenTest(TimestampType, TimestampLTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTest(TimestampLTZNanosType(7), TimestampNTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTest(DateType, TimestampNTZNanosType(7), Some(TimestampNTZNanosType(7))) } test("tightest common bound for types") { @@ -219,6 +228,29 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase { widenTest(IntegerType, TimestampType, None) widenTest(StringType, TimestampType, None) + // Nanosecond-precision timestamp types (SPARK-57454). Kept in sync with the same block in + // TypeCoercionSuite, since both findTightestCommonType impls share findWiderDateTimeType. + // nanos(p1) <-> nanos(p2) within the same family widen to the max precision (incl. self-pair). + widenTest(TimestampLTZNanosType(7), TimestampLTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTest(TimestampLTZNanosType(8), TimestampLTZNanosType(8), Some(TimestampLTZNanosType(8))) + widenTest(TimestampNTZNanosType(7), TimestampNTZNanosType(9), Some(TimestampNTZNanosType(9))) + // micro <-> nanos within the same family widen to the nanos type. + widenTest(TimestampType, TimestampLTZNanosType(7), Some(TimestampLTZNanosType(7))) + widenTest(TimestampNTZType, TimestampNTZNanosType(8), Some(TimestampNTZNanosType(8))) + // Mixed time-zone families widen to the LTZ family (mirrors TIMESTAMP + TIMESTAMP_NTZ). + widenTest(TimestampLTZNanosType(7), TimestampNTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTest(TimestampLTZNanosType(7), TimestampNTZType, Some(TimestampLTZNanosType(7))) + widenTest(TimestampType, TimestampNTZNanosType(9), Some(TimestampLTZNanosType(9))) + // nanos <-> date widen to the nanos type of the same family. + widenTest(DateType, TimestampLTZNanosType(8), Some(TimestampLTZNanosType(8))) + widenTest(DateType, TimestampNTZNanosType(7), Some(TimestampNTZNanosType(7))) + // nanos <-> TIME has no common datetime type. + widenTest(TimestampLTZNanosType(9), TimeType(6), None) + widenTest(TimestampNTZNanosType(9), TimeType(6), None) + // No common type with non-datetime types. + widenTest(IntegerType, TimestampLTZNanosType(9), None) + widenTest(StringType, TimestampNTZNanosType(9), None) + // ComplexType widenTest(NullType, MapType(IntegerType, StringType, false), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index c59b687dc6ed..de0f3207bebc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -652,6 +652,28 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase { widenTest(IntegerType, TimestampType, None) widenTest(StringType, TimestampType, None) + // Nanosecond-precision timestamp types (SPARK-57454). + // nanos(p1) <-> nanos(p2) within the same family widen to the max precision. + widenTest(TimestampLTZNanosType(7), TimestampLTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTest(TimestampLTZNanosType(8), TimestampLTZNanosType(8), Some(TimestampLTZNanosType(8))) + widenTest(TimestampNTZNanosType(7), TimestampNTZNanosType(9), Some(TimestampNTZNanosType(9))) + // micro <-> nanos within the same family widen to the nanos type. + widenTest(TimestampType, TimestampLTZNanosType(7), Some(TimestampLTZNanosType(7))) + widenTest(TimestampNTZType, TimestampNTZNanosType(8), Some(TimestampNTZNanosType(8))) + // Mixed time-zone families widen to the LTZ family (mirrors TIMESTAMP + TIMESTAMP_NTZ). + widenTest(TimestampLTZNanosType(7), TimestampNTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTest(TimestampLTZNanosType(7), TimestampNTZType, Some(TimestampLTZNanosType(7))) + widenTest(TimestampType, TimestampNTZNanosType(9), Some(TimestampLTZNanosType(9))) + // nanos <-> date widen to the nanos type of the same family. + widenTest(DateType, TimestampLTZNanosType(8), Some(TimestampLTZNanosType(8))) + widenTest(DateType, TimestampNTZNanosType(7), Some(TimestampNTZNanosType(7))) + // nanos <-> TIME has no common datetime type. + widenTest(TimestampLTZNanosType(9), TimeType(6), None) + widenTest(TimestampNTZNanosType(9), TimeType(6), None) + // No common type with non-datetime types. + widenTest(IntegerType, TimestampLTZNanosType(9), None) + widenTest(StringType, TimestampNTZNanosType(9), None) + // ComplexType widenTest(NullType, MapType(IntegerType, StringType, false), @@ -962,6 +984,22 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase { new StructType().add("a", StringType), new StructType().add("a", IntegerType), Some(new StructType().add("a", StringType))) + + // Nanosecond-precision timestamp types (SPARK-57454). + widenTestWithStringPromotion( + TimestampType, TimestampLTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTestWithStringPromotion( + TimestampLTZNanosType(7), TimestampNTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTestWithStringPromotion( + DateType, TimestampNTZNanosType(7), Some(TimestampNTZNanosType(7))) + widenTestWithoutStringPromotion( + TimestampType, TimestampLTZNanosType(9), Some(TimestampLTZNanosType(9))) + widenTestWithoutStringPromotion( + ArrayType(TimestampType), ArrayType(TimestampNTZNanosType(8)), + Some(ArrayType(TimestampLTZNanosType(8)))) + // nanos <-> string promotes to string with promotion, no common type without it. + widenTestWithStringPromotion(StringType, TimestampLTZNanosType(9), Some(StringType)) + widenTestWithoutStringPromotion(StringType, TimestampNTZNanosType(9), None) } test("cast NullType for expressions that implement ExpectsInputTypes") { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out index 4ae37b45aa73..21e3c2e4a020 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out @@ -833,3 +833,141 @@ SELECT timestamp_nanos(CAST(NULL AS BIGINT)) -- !query analysis Project [timestamp_nanos(cast(null as bigint)) AS timestamp_nanos(CAST(NULL AS BIGINT))#x] +- OneRowRelation + + +-- !query +SELECT typeof(c), c FROM ( + SELECT TIMESTAMP_LTZ '0001-01-01 00:00:00' AS c + UNION ALL SELECT TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999') ORDER BY c +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(c), c FROM ( + SELECT '1582-10-04 12:30:45.1234567' :: timestamp_ltz(7) AS c + UNION ALL SELECT '1582-10-15 23:59:59.123456789' :: timestamp_ltz(9)) ORDER BY c +-- !query analysis +Sort [c#x ASC NULLS FIRST], true ++- Project [typeof(c#x) AS typeof(c)#x, c#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Union false, false + :- Project [cast(c#x as timestamp_ltz(9)) AS c#x] + : +- Project [cast(1582-10-04 12:30:45.1234567 as timestamp_ltz(7)) AS c#x] + : +- OneRowRelation + +- Project [cast(1582-10-15 23:59:59.123456789 as timestamp_ltz(9)) AS CAST(1582-10-15 23:59:59.123456789 AS TIMESTAMP_LTZ(9))#x] + +- OneRowRelation + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + '1969-12-31 23:59:59.0000001 Asia/Kolkata' :: timestamp_ltz(7), + '1969-12-31 23:59:59.999999999 UTC' :: timestamp_ltz(9)) AS v) +-- !query analysis +Project [typeof(v#x) AS typeof(v)#x, v#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [coalesce(cast(cast(1969-12-31 23:59:59.0000001 Asia/Kolkata as timestamp_ltz(7)) as timestamp_ltz(9)), cast(1969-12-31 23:59:59.999999999 UTC as timestamp_ltz(9))) AS v#x] + +- OneRowRelation + + +-- !query +SELECT typeof(v), v FROM (SELECT CASE WHEN true + THEN TIMESTAMP_LTZ '2026-06-21 10:16:30 Asia/Kathmandu' + ELSE '2026-06-21 10:16:30.987654321 UTC' :: timestamp_ltz(9) END AS v) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + DATE '0001-01-01', '2020-01-01 00:00:00.12345678' :: timestamp_ltz(8)) AS v) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(greatest(TIMESTAMP_LTZ '0001-01-01 00:00:00', + '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9))) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT greatest(TIMESTAMP_LTZ '1500-03-01 12:00:00', + '1582-10-15 00:00:00.123456789' :: timestamp_ltz(9), + TIMESTAMP_LTZ '2026-06-21 10:16:30.5') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT least('1970-01-01 00:00:00.0000001' :: timestamp_ltz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ltz(9)) +-- !query analysis +Project [least(cast(cast(1970-01-01 00:00:00.0000001 as timestamp_ltz(7)) as timestamp_ltz(9)), cast(1969-12-31 23:59:59.999999999 as timestamp_ltz(9))) AS least(CAST(1970-01-01 00:00:00.0000001 AS TIMESTAMP_LTZ(7)), CAST(1969-12-31 23:59:59.999999999 AS TIMESTAMP_LTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT array('0001-01-01 00:00:00.0000001' :: timestamp_ltz(7), + TIMESTAMP_LTZ '2026-06-21 10:16:30 Asia/Kolkata', + '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9)) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(array(TIMESTAMP_LTZ '9999-12-31 23:59:59', + '0001-01-01 00:00:00.000000001' :: timestamp_ltz(9))) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT map('min', '0001-01-01 00:00:00.000000001' :: timestamp_ltz(9), + 'max', TIMESTAMP_LTZ '9999-12-31 23:59:59.999999') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + TIMESTAMP_NTZ '2026-06-21 10:16:30.123456789', + '1970-01-01 00:00:00.000000001 UTC' :: timestamp_ltz(9)) AS v) +-- !query analysis +Project [typeof(v#x) AS typeof(v)#x, v#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [coalesce(cast(2026-06-21 10:16:30.123456789 as timestamp_ltz(9)), cast(1970-01-01 00:00:00.000000001 UTC as timestamp_ltz(9))) AS v#x] + +- OneRowRelation + + +-- !query +SELECT typeof(c) FROM ( + SELECT TIMESTAMP_NTZ '1582-10-15 00:00:00' AS c + UNION ALL SELECT '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9)) +-- !query analysis +Project [typeof(c#x) AS typeof(c)#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Union false, false + :- Project [cast(c#x as timestamp_ltz(9)) AS c#x] + : +- Project [1582-10-15 00:00:00 AS c#x] + : +- OneRowRelation + +- Project [cast(9999-12-31 23:59:59.999999999 as timestamp_ltz(9)) AS CAST(9999-12-31 23:59:59.999999999 AS TIMESTAMP_LTZ(9))#x] + +- OneRowRelation + + +-- !query +SELECT typeof(coalesce('0001-01-01 00:00:00.0000001' :: timestamp_ntz(7), + '2026-06-21 10:16:30.123456789 UTC' :: timestamp_ltz(9))) +-- !query analysis +Project [typeof(coalesce(cast(cast(0001-01-01 00:00:00.0000001 as timestamp_ntz(7)) as timestamp_ltz(9)), cast(2026-06-21 10:16:30.123456789 UTC as timestamp_ltz(9)))) AS typeof(coalesce(CAST(0001-01-01 00:00:00.0000001 AS TIMESTAMP_NTZ(7)), CAST(2026-06-21 10:16:30.123456789 UTC AS TIMESTAMP_LTZ(9))))#x] ++- OneRowRelation + + +-- !query +SELECT typeof(CASE WHEN true + THEN '1969-12-31 23:59:59.1234567' :: timestamp_ntz(7) + ELSE '1970-01-01 00:00:00.123456789 UTC' :: timestamp_ltz(9) END) +-- !query analysis +Project [typeof(CASE WHEN true THEN cast(cast(1969-12-31 23:59:59.1234567 as timestamp_ntz(7)) as timestamp_ltz(9)) ELSE cast(1970-01-01 00:00:00.123456789 UTC as timestamp_ltz(9)) END) AS typeof(CASE WHEN true THEN CAST(1969-12-31 23:59:59.1234567 AS TIMESTAMP_NTZ(7)) ELSE CAST(1970-01-01 00:00:00.123456789 UTC AS TIMESTAMP_LTZ(9)) END)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out index 9eda12a1e23c..09c18e064757 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out @@ -694,3 +694,114 @@ SELECT unix_nanos(NULL :: timestamp_ntz(9)) -- !query analysis Project [unix_nanos(cast(null as timestamp_ntz(9))) AS unix_nanos(CAST(NULL AS TIMESTAMP_NTZ(9)))#x] +- OneRowRelation + + +-- !query +SELECT typeof(c), c FROM ( + SELECT TIMESTAMP_NTZ '0001-01-01 00:00:00' AS c + UNION ALL SELECT TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999') ORDER BY c +-- !query analysis +Sort [c#x ASC NULLS FIRST], true ++- Project [typeof(c#x) AS typeof(c)#x, c#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Union false, false + :- Project [cast(c#x as timestamp_ntz(9)) AS c#x] + : +- Project [0001-01-01 00:00:00 AS c#x] + : +- OneRowRelation + +- Project [9999-12-31 23:59:59.999999999 AS TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999'#x] + +- OneRowRelation + + +-- !query +SELECT typeof(c), c FROM ( + SELECT '1582-10-04 12:30:45.1234567' :: timestamp_ntz(7) AS c + UNION ALL SELECT '1582-10-15 23:59:59.123456789' :: timestamp_ntz(9)) ORDER BY c +-- !query analysis +Sort [c#x ASC NULLS FIRST], true ++- Project [typeof(c#x) AS typeof(c)#x, c#x] + +- SubqueryAlias __auto_generated_subquery_name + +- Union false, false + :- Project [cast(c#x as timestamp_ntz(9)) AS c#x] + : +- Project [cast(1582-10-04 12:30:45.1234567 as timestamp_ntz(7)) AS c#x] + : +- OneRowRelation + +- Project [cast(1582-10-15 23:59:59.123456789 as timestamp_ntz(9)) AS CAST(1582-10-15 23:59:59.123456789 AS TIMESTAMP_NTZ(9))#x] + +- OneRowRelation + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + '1969-12-31 23:59:59.0000001' :: timestamp_ntz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ntz(9)) AS v) +-- !query analysis +Project [typeof(v#x) AS typeof(v)#x, v#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [coalesce(cast(cast(1969-12-31 23:59:59.0000001 as timestamp_ntz(7)) as timestamp_ntz(9)), cast(1969-12-31 23:59:59.999999999 as timestamp_ntz(9))) AS v#x] + +- OneRowRelation + + +-- !query +SELECT typeof(v), v FROM (SELECT CASE WHEN true + THEN TIMESTAMP_NTZ '2026-06-21 10:16:30' + ELSE '2026-06-21 10:16:30.987654321' :: timestamp_ntz(9) END AS v) +-- !query analysis +Project [typeof(v#x) AS typeof(v)#x, v#x] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [CASE WHEN true THEN cast(2026-06-21 10:16:30 as timestamp_ntz(9)) ELSE cast(2026-06-21 10:16:30.987654321 as timestamp_ntz(9)) END AS v#x] + +- OneRowRelation + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + DATE '0001-01-01', '2020-01-01 00:00:00.12345678' :: timestamp_ntz(8)) AS v) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT typeof(greatest(TIMESTAMP_NTZ '0001-01-01 00:00:00', + '9999-12-31 23:59:59.999999999' :: timestamp_ntz(9))) +-- !query analysis +Project [typeof(greatest(cast(0001-01-01 00:00:00 as timestamp_ntz(9)), cast(9999-12-31 23:59:59.999999999 as timestamp_ntz(9)))) AS typeof(greatest(TIMESTAMP_NTZ '0001-01-01 00:00:00', CAST(9999-12-31 23:59:59.999999999 AS TIMESTAMP_NTZ(9))))#x] ++- OneRowRelation + + +-- !query +SELECT greatest(TIMESTAMP_NTZ '1500-03-01 12:00:00', + '1582-10-15 00:00:00.123456789' :: timestamp_ntz(9), + TIMESTAMP_NTZ '2026-06-21 10:16:30.5') +-- !query analysis +Project [greatest(cast(1500-03-01 12:00:00 as timestamp_ntz(9)), cast(1582-10-15 00:00:00.123456789 as timestamp_ntz(9)), cast(2026-06-21 10:16:30.5 as timestamp_ntz(9))) AS greatest(TIMESTAMP_NTZ '1500-03-01 12:00:00', CAST(1582-10-15 00:00:00.123456789 AS TIMESTAMP_NTZ(9)), TIMESTAMP_NTZ '2026-06-21 10:16:30.5')#x] ++- OneRowRelation + + +-- !query +SELECT least('1970-01-01 00:00:00.0000001' :: timestamp_ntz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ntz(9)) +-- !query analysis +Project [least(cast(cast(1970-01-01 00:00:00.0000001 as timestamp_ntz(7)) as timestamp_ntz(9)), cast(1969-12-31 23:59:59.999999999 as timestamp_ntz(9))) AS least(CAST(1970-01-01 00:00:00.0000001 AS TIMESTAMP_NTZ(7)), CAST(1969-12-31 23:59:59.999999999 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT array('0001-01-01 00:00:00.0000001' :: timestamp_ntz(7), + TIMESTAMP_NTZ '2026-06-21 10:16:30', + '9999-12-31 23:59:59.999999999' :: timestamp_ntz(9)) +-- !query analysis +Project [array(cast(cast(0001-01-01 00:00:00.0000001 as timestamp_ntz(7)) as timestamp_ntz(9)), cast(2026-06-21 10:16:30 as timestamp_ntz(9)), cast(9999-12-31 23:59:59.999999999 as timestamp_ntz(9))) AS array(CAST(0001-01-01 00:00:00.0000001 AS TIMESTAMP_NTZ(7)), TIMESTAMP_NTZ '2026-06-21 10:16:30', CAST(9999-12-31 23:59:59.999999999 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +SELECT typeof(array(TIMESTAMP_NTZ '9999-12-31 23:59:59', + '0001-01-01 00:00:00.000000001' :: timestamp_ntz(9))) +-- !query analysis +Project [typeof(array(cast(9999-12-31 23:59:59 as timestamp_ntz(9)), cast(0001-01-01 00:00:00.000000001 as timestamp_ntz(9)))) AS typeof(array(TIMESTAMP_NTZ '9999-12-31 23:59:59', CAST(0001-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9))))#x] ++- OneRowRelation + + +-- !query +SELECT map('min', '0001-01-01 00:00:00.000000001' :: timestamp_ntz(9), + 'max', TIMESTAMP_NTZ '9999-12-31 23:59:59.999999') +-- !query analysis +Project [map(min, cast(0001-01-01 00:00:00.000000001 as timestamp_ntz(9)), max, cast(9999-12-31 23:59:59.999999 as timestamp_ntz(9))) AS map(min, CAST(0001-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9)), max, TIMESTAMP_NTZ '9999-12-31 23:59:59.999999')#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql index 146486879fff..061d218c275d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql @@ -238,3 +238,74 @@ SELECT timestamp_nanos(10000000000000000000000000BD); SELECT timestamp_nanos(1.0D); -- NULL input. SELECT timestamp_nanos(CAST(NULL AS BIGINT)); + +-- SPARK-57454: implicit type coercion / widening over nanosecond TIMESTAMP_LTZ(p). The resolved +-- common type itself is unit-tested in TypeCoercionSuite / AnsiTypeCoercionSuite, and the operator +-- wiring (schema and boolean outcomes for UNION/coalesce/CASE/IN/comparison) in +-- TimestampNanosWideningSuite; the cases below complement those by locking the resolved type with +-- typeof() and the end-to-end rendered values, by covering operators those suites do not +-- (greatest/least and the array/map constructors), and by exercising the mixed time-zone family +-- rule that has no TIMESTAMP_NTZ counterpart. Values span the min/max supported instants, the 1582 +-- Julian/Gregorian boundary (proleptic Gregorian), pre/post epoch, near-current values, varied +-- fractions / precisions, and non-standard source zones. Bare literals are interpreted in the +-- session zone (America/Los_Angeles) and so round-trip on rendering; an explicit source zone +-- (e.g. the sub-hour offsets Asia/Kolkata +05:30 and Asia/Kathmandu +05:45) shifts the rendered +-- wall clock deterministically. + +-- UNION ALL widens micro -> nanos: the minimum and maximum supported instants. +SELECT typeof(c), c FROM ( + SELECT TIMESTAMP_LTZ '0001-01-01 00:00:00' AS c + UNION ALL SELECT TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999') ORDER BY c; +-- UNION ALL widens nanos(7)/nanos(9) -> nanos(9): around the 1582 Julian/Gregorian boundary. +SELECT typeof(c), c FROM ( + SELECT '1582-10-04 12:30:45.1234567' :: timestamp_ltz(7) AS c + UNION ALL SELECT '1582-10-15 23:59:59.123456789' :: timestamp_ltz(9)) ORDER BY c; + +-- coalesce keeps the first non-null, widened: pre-epoch boundary read from a +05:30-offset zone. +SELECT typeof(v), v FROM (SELECT coalesce( + '1969-12-31 23:59:59.0000001 Asia/Kolkata' :: timestamp_ltz(7), + '1969-12-31 23:59:59.999999999 UTC' :: timestamp_ltz(9)) AS v); +-- CASE WHEN unifies its branches: a near-current value read from a +05:45-offset zone. +SELECT typeof(v), v FROM (SELECT CASE WHEN true + THEN TIMESTAMP_LTZ '2026-06-21 10:16:30 Asia/Kathmandu' + ELSE '2026-06-21 10:16:30.987654321 UTC' :: timestamp_ltz(9) END AS v); + +-- nanos <-> DATE widening: the minimum DATE adopts the nanos family, midnight in the session zone. +SELECT typeof(v), v FROM (SELECT coalesce( + DATE '0001-01-01', '2020-01-01 00:00:00.12345678' :: timestamp_ltz(8)) AS v); + +-- greatest / least widen their arguments to the common nanosecond type and pick the extreme instant. +SELECT typeof(greatest(TIMESTAMP_LTZ '0001-01-01 00:00:00', + '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9))); +SELECT greatest(TIMESTAMP_LTZ '1500-03-01 12:00:00', + '1582-10-15 00:00:00.123456789' :: timestamp_ltz(9), + TIMESTAMP_LTZ '2026-06-21 10:16:30.5'); +SELECT least('1970-01-01 00:00:00.0000001' :: timestamp_ltz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ltz(9)); + +-- array() unifies element types and map() value types: a spread of eras, zones and precisions. +SELECT array('0001-01-01 00:00:00.0000001' :: timestamp_ltz(7), + TIMESTAMP_LTZ '2026-06-21 10:16:30 Asia/Kolkata', + '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9)); +SELECT typeof(array(TIMESTAMP_LTZ '9999-12-31 23:59:59', + '0001-01-01 00:00:00.000000001' :: timestamp_ltz(9))); +SELECT map('min', '0001-01-01 00:00:00.000000001' :: timestamp_ltz(9), + 'max', TIMESTAMP_LTZ '9999-12-31 23:59:59.999999'); + +-- Mixed time-zone families widen to the LTZ family (mirrors TIMESTAMP + TIMESTAMP_NTZ -> TIMESTAMP). +-- A value-pinned case: the inserted cross-family cast reinterprets the NTZ wall clock as an instant +-- in the session zone (America/Los_Angeles) and the result is rendered back there, so it round-trips +-- to the same wall clock with the sub-microsecond digits preserved. This locks the cast's +-- sessionLocalTimeZone wiring -- a UTC misread would render a different instant. +SELECT typeof(v), v FROM (SELECT coalesce( + TIMESTAMP_NTZ '2026-06-21 10:16:30.123456789', + '1970-01-01 00:00:00.000000001 UTC' :: timestamp_ltz(9)) AS v); +-- The remaining mixed-family cases assert the resolved type only (varied precisions and eras). +SELECT typeof(c) FROM ( + SELECT TIMESTAMP_NTZ '1582-10-15 00:00:00' AS c + UNION ALL SELECT '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9)); +SELECT typeof(coalesce('0001-01-01 00:00:00.0000001' :: timestamp_ntz(7), + '2026-06-21 10:16:30.123456789 UTC' :: timestamp_ltz(9))); +SELECT typeof(CASE WHEN true + THEN '1969-12-31 23:59:59.1234567' :: timestamp_ntz(7) + ELSE '1970-01-01 00:00:00.123456789 UTC' :: timestamp_ltz(9) END); diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql index ac2443cfc611..df7406a9ec9e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql @@ -198,3 +198,54 @@ SELECT unix_nanos(TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999'); SELECT unix_nanos(TIMESTAMP_NTZ '1960-01-01 00:00:00.000000001'); -- NULL nanosecond timestamp. SELECT unix_nanos(NULL :: timestamp_ntz(9)); + +-- SPARK-57454: implicit type coercion / widening over nanosecond TIMESTAMP_NTZ(p). The resolved +-- common type itself is unit-tested in TypeCoercionSuite / AnsiTypeCoercionSuite, and the operator +-- wiring (schema and boolean outcomes for UNION/coalesce/CASE/IN/comparison) in +-- TimestampNanosWideningSuite; the cases below complement those by locking the resolved type with +-- typeof() and the end-to-end rendered values, by covering operators those suites do not +-- (greatest/least and the array/map constructors), and by spanning the value range: the min/max +-- supported timestamps, the 1582 Julian/Gregorian boundary (Spark uses the proleptic Gregorian +-- calendar), pre/post epoch, near-current values, and varied fractions / precisions. NTZ is +-- zone-independent, so the time-zone dimension is exercised in timestamp-ltz-nanos.sql instead. + +-- UNION ALL widens micro -> nanos: the minimum and maximum supported TIMESTAMP_NTZ values. +SELECT typeof(c), c FROM ( + SELECT TIMESTAMP_NTZ '0001-01-01 00:00:00' AS c + UNION ALL SELECT TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999') ORDER BY c; +-- UNION ALL widens nanos(7)/nanos(9) -> nanos(9): around the 1582 Julian/Gregorian boundary +-- (1582-10-05..14 are valid dates only under the proleptic Gregorian calendar). +SELECT typeof(c), c FROM ( + SELECT '1582-10-04 12:30:45.1234567' :: timestamp_ntz(7) AS c + UNION ALL SELECT '1582-10-15 23:59:59.123456789' :: timestamp_ntz(9)) ORDER BY c; + +-- coalesce keeps the first non-null, widened to the wider precision: pre-epoch boundary values. +SELECT typeof(v), v FROM (SELECT coalesce( + '1969-12-31 23:59:59.0000001' :: timestamp_ntz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ntz(9)) AS v); +-- CASE WHEN unifies its branches: a near-current value taken from the micro branch. +SELECT typeof(v), v FROM (SELECT CASE WHEN true + THEN TIMESTAMP_NTZ '2026-06-21 10:16:30' + ELSE '2026-06-21 10:16:30.987654321' :: timestamp_ntz(9) END AS v); + +-- nanos <-> DATE widening: the minimum DATE adopts the nanos family and renders at midnight. +SELECT typeof(v), v FROM (SELECT coalesce( + DATE '0001-01-01', '2020-01-01 00:00:00.12345678' :: timestamp_ntz(8)) AS v); + +-- greatest / least widen their arguments to the common nanosecond type and pick the extreme instant. +SELECT typeof(greatest(TIMESTAMP_NTZ '0001-01-01 00:00:00', + '9999-12-31 23:59:59.999999999' :: timestamp_ntz(9))); +SELECT greatest(TIMESTAMP_NTZ '1500-03-01 12:00:00', + '1582-10-15 00:00:00.123456789' :: timestamp_ntz(9), + TIMESTAMP_NTZ '2026-06-21 10:16:30.5'); +SELECT least('1970-01-01 00:00:00.0000001' :: timestamp_ntz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ntz(9)); + +-- array() unifies element types and map() value types: a spread of eras, fractions and precisions. +SELECT array('0001-01-01 00:00:00.0000001' :: timestamp_ntz(7), + TIMESTAMP_NTZ '2026-06-21 10:16:30', + '9999-12-31 23:59:59.999999999' :: timestamp_ntz(9)); +SELECT typeof(array(TIMESTAMP_NTZ '9999-12-31 23:59:59', + '0001-01-01 00:00:00.000000001' :: timestamp_ntz(9))); +SELECT map('min', '0001-01-01 00:00:00.000000001' :: timestamp_ntz(9), + 'max', TIMESTAMP_NTZ '9999-12-31 23:59:59.999999'); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out index 6a3585c414c2..ceec6d71ebad 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out @@ -939,3 +939,150 @@ SELECT timestamp_nanos(CAST(NULL AS BIGINT)) struct -- !query output NULL + + +-- !query +SELECT typeof(c), c FROM ( + SELECT TIMESTAMP_LTZ '0001-01-01 00:00:00' AS c + UNION ALL SELECT TIMESTAMP_LTZ '9999-12-31 23:59:59.999999999') ORDER BY c +-- !query schema +struct +-- !query output +timestamp_ltz(9) 0001-01-01 00:00:00 +timestamp_ltz(9) 9999-12-31 23:59:59.999999999 + + +-- !query +SELECT typeof(c), c FROM ( + SELECT '1582-10-04 12:30:45.1234567' :: timestamp_ltz(7) AS c + UNION ALL SELECT '1582-10-15 23:59:59.123456789' :: timestamp_ltz(9)) ORDER BY c +-- !query schema +struct +-- !query output +timestamp_ltz(9) 1582-10-04 12:30:45.1234567 +timestamp_ltz(9) 1582-10-15 23:59:59.123456789 + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + '1969-12-31 23:59:59.0000001 Asia/Kolkata' :: timestamp_ltz(7), + '1969-12-31 23:59:59.999999999 UTC' :: timestamp_ltz(9)) AS v) +-- !query schema +struct +-- !query output +timestamp_ltz(9) 1969-12-31 10:29:59.0000001 + + +-- !query +SELECT typeof(v), v FROM (SELECT CASE WHEN true + THEN TIMESTAMP_LTZ '2026-06-21 10:16:30 Asia/Kathmandu' + ELSE '2026-06-21 10:16:30.987654321 UTC' :: timestamp_ltz(9) END AS v) +-- !query schema +struct +-- !query output +timestamp_ltz(9) 2026-06-20 21:31:30 + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + DATE '0001-01-01', '2020-01-01 00:00:00.12345678' :: timestamp_ltz(8)) AS v) +-- !query schema +struct +-- !query output +timestamp_ltz(8) 0001-01-01 00:00:00 + + +-- !query +SELECT typeof(greatest(TIMESTAMP_LTZ '0001-01-01 00:00:00', + '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9))) +-- !query schema +struct +-- !query output +timestamp_ltz(9) + + +-- !query +SELECT greatest(TIMESTAMP_LTZ '1500-03-01 12:00:00', + '1582-10-15 00:00:00.123456789' :: timestamp_ltz(9), + TIMESTAMP_LTZ '2026-06-21 10:16:30.5') +-- !query schema +struct +-- !query output +2026-06-21 10:16:30.5 + + +-- !query +SELECT least('1970-01-01 00:00:00.0000001' :: timestamp_ltz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ltz(9)) +-- !query schema +struct +-- !query output +1969-12-31 23:59:59.999999999 + + +-- !query +SELECT array('0001-01-01 00:00:00.0000001' :: timestamp_ltz(7), + TIMESTAMP_LTZ '2026-06-21 10:16:30 Asia/Kolkata', + '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9)) +-- !query schema +struct> +-- !query output +[0001-01-01 00:00:00.0000001,2026-06-20 21:46:30,9999-12-31 23:59:59.999999999] + + +-- !query +SELECT typeof(array(TIMESTAMP_LTZ '9999-12-31 23:59:59', + '0001-01-01 00:00:00.000000001' :: timestamp_ltz(9))) +-- !query schema +struct +-- !query output +array + + +-- !query +SELECT map('min', '0001-01-01 00:00:00.000000001' :: timestamp_ltz(9), + 'max', TIMESTAMP_LTZ '9999-12-31 23:59:59.999999') +-- !query schema +struct> +-- !query output +{"max":9999-12-31 23:59:59.999999,"min":0001-01-01 00:00:00.000000001} + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + TIMESTAMP_NTZ '2026-06-21 10:16:30.123456789', + '1970-01-01 00:00:00.000000001 UTC' :: timestamp_ltz(9)) AS v) +-- !query schema +struct +-- !query output +timestamp_ltz(9) 2026-06-21 10:16:30.123456789 + + +-- !query +SELECT typeof(c) FROM ( + SELECT TIMESTAMP_NTZ '1582-10-15 00:00:00' AS c + UNION ALL SELECT '9999-12-31 23:59:59.999999999' :: timestamp_ltz(9)) +-- !query schema +struct +-- !query output +timestamp_ltz(9) +timestamp_ltz(9) + + +-- !query +SELECT typeof(coalesce('0001-01-01 00:00:00.0000001' :: timestamp_ntz(7), + '2026-06-21 10:16:30.123456789 UTC' :: timestamp_ltz(9))) +-- !query schema +struct +-- !query output +timestamp_ltz(9) + + +-- !query +SELECT typeof(CASE WHEN true + THEN '1969-12-31 23:59:59.1234567' :: timestamp_ntz(7) + ELSE '1970-01-01 00:00:00.123456789 UTC' :: timestamp_ltz(9) END) +-- !query schema +struct +-- !query output +timestamp_ltz(9) diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out index d5ea92ddcc37..ba21a069ba4c 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out @@ -776,3 +776,110 @@ SELECT unix_nanos(NULL :: timestamp_ntz(9)) struct -- !query output NULL + + +-- !query +SELECT typeof(c), c FROM ( + SELECT TIMESTAMP_NTZ '0001-01-01 00:00:00' AS c + UNION ALL SELECT TIMESTAMP_NTZ '9999-12-31 23:59:59.999999999') ORDER BY c +-- !query schema +struct +-- !query output +timestamp_ntz(9) 0001-01-01 00:00:00 +timestamp_ntz(9) 9999-12-31 23:59:59.999999999 + + +-- !query +SELECT typeof(c), c FROM ( + SELECT '1582-10-04 12:30:45.1234567' :: timestamp_ntz(7) AS c + UNION ALL SELECT '1582-10-15 23:59:59.123456789' :: timestamp_ntz(9)) ORDER BY c +-- !query schema +struct +-- !query output +timestamp_ntz(9) 1582-10-04 12:30:45.1234567 +timestamp_ntz(9) 1582-10-15 23:59:59.123456789 + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + '1969-12-31 23:59:59.0000001' :: timestamp_ntz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ntz(9)) AS v) +-- !query schema +struct +-- !query output +timestamp_ntz(9) 1969-12-31 23:59:59.0000001 + + +-- !query +SELECT typeof(v), v FROM (SELECT CASE WHEN true + THEN TIMESTAMP_NTZ '2026-06-21 10:16:30' + ELSE '2026-06-21 10:16:30.987654321' :: timestamp_ntz(9) END AS v) +-- !query schema +struct +-- !query output +timestamp_ntz(9) 2026-06-21 10:16:30 + + +-- !query +SELECT typeof(v), v FROM (SELECT coalesce( + DATE '0001-01-01', '2020-01-01 00:00:00.12345678' :: timestamp_ntz(8)) AS v) +-- !query schema +struct +-- !query output +timestamp_ntz(8) 0001-01-01 00:00:00 + + +-- !query +SELECT typeof(greatest(TIMESTAMP_NTZ '0001-01-01 00:00:00', + '9999-12-31 23:59:59.999999999' :: timestamp_ntz(9))) +-- !query schema +struct +-- !query output +timestamp_ntz(9) + + +-- !query +SELECT greatest(TIMESTAMP_NTZ '1500-03-01 12:00:00', + '1582-10-15 00:00:00.123456789' :: timestamp_ntz(9), + TIMESTAMP_NTZ '2026-06-21 10:16:30.5') +-- !query schema +struct +-- !query output +2026-06-21 10:16:30.5 + + +-- !query +SELECT least('1970-01-01 00:00:00.0000001' :: timestamp_ntz(7), + '1969-12-31 23:59:59.999999999' :: timestamp_ntz(9)) +-- !query schema +struct +-- !query output +1969-12-31 23:59:59.999999999 + + +-- !query +SELECT array('0001-01-01 00:00:00.0000001' :: timestamp_ntz(7), + TIMESTAMP_NTZ '2026-06-21 10:16:30', + '9999-12-31 23:59:59.999999999' :: timestamp_ntz(9)) +-- !query schema +struct> +-- !query output +[0001-01-01 00:00:00.0000001,2026-06-21 10:16:30,9999-12-31 23:59:59.999999999] + + +-- !query +SELECT typeof(array(TIMESTAMP_NTZ '9999-12-31 23:59:59', + '0001-01-01 00:00:00.000000001' :: timestamp_ntz(9))) +-- !query schema +struct +-- !query output +array + + +-- !query +SELECT map('min', '0001-01-01 00:00:00.000000001' :: timestamp_ntz(9), + 'max', TIMESTAMP_NTZ '9999-12-31 23:59:59.999999') +-- !query schema +struct> +-- !query output +{"max":9999-12-31 23:59:59.999999,"min":0001-01-01 00:00:00.000000001} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosWideningSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosWideningSuiteBase.scala new file mode 100644 index 000000000000..22bb69fdfc6b --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosWideningSuiteBase.scala @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import java.time.{Instant, LocalDate, LocalDateTime} + +import org.apache.spark.SparkConf +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +/** + * End-to-end tests for implicit type coercion / widening over the nanosecond-precision timestamp + * types `TIMESTAMP_NTZ(p)` / `TIMESTAMP_LTZ(p)` (`p` in `[7, 9]`), part of the nanosecond timestamp + * preview (SPARK-56822). Exercises the operators that rely on `findWiderDateTimeType` + * (SPARK-57454): `UNION ALL`, `coalesce`, `IN`, `CASE WHEN`, and binary comparisons, mixing the + * microsecond and nanosecond timestamp types across both time-zone families. The two subclasses run + * every test with ANSI mode on and off. + * + * The nanosecond timestamp types are gated behind a preview flag that is enabled by default under + * tests (`Utils.isTesting`), so it is not set here. The session time zone is fixed so the + * `TIMESTAMP_LTZ` values are deterministic. The Java 8 datetime API is enabled so the microsecond + * `TIMESTAMP` / `TIMESTAMP_NTZ` / `DATE` columns accept `Instant` / `LocalDateTime` / `LocalDate`, + * matching the external types of the nanosecond timestamp types (see `RowEncoder`). + */ +abstract class TimestampNanosWideningSuiteBase extends SharedSparkSession { + + override def sparkConf: SparkConf = super.sparkConf + .set(SQLConf.SESSION_LOCAL_TIMEZONE.key, "America/Los_Angeles") + .set(SQLConf.DATETIME_JAVA8API_ENABLED.key, "true") + + // Microsecond-aligned instants/local-date-times, so they are representable exactly at every + // precision in [6, 9] and the widening never changes the stored value. + private val instantA = Instant.parse("2020-01-01T00:00:00Z") + private val instantB = Instant.parse("2021-07-15T12:34:56.000001Z") + private val ldtA = LocalDateTime.parse("2020-01-01T00:00:00") + private val ldtB = LocalDateTime.parse("2021-07-15T12:34:56.000001") + + private def single(dt: DataType, value: Any): DataFrame = + spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(value))), + new StructType().add("c", dt)) + + private def twoCols(dt1: DataType, v1: Any, dt2: DataType, v2: Any): DataFrame = + spark.createDataFrame( + spark.sparkContext.parallelize(Seq(Row(v1, v2))), + new StructType().add("a", dt1).add("b", dt2)) + + test("SPARK-57454: UNION ALL widens nanosecond timestamps") { + // micro <-> nanos within the same family widen to the nanos type. + val ltz = single(TimestampType, instantA).union(single(TimestampLTZNanosType(9), instantB)) + assert(ltz.schema("c").dataType === TimestampLTZNanosType(9)) + assert(ltz.count() === 2) + + val ntz = single(TimestampNTZType, ldtA).union(single(TimestampNTZNanosType(8), ldtB)) + assert(ntz.schema("c").dataType === TimestampNTZNanosType(8)) + assert(ntz.count() === 2) + + // nanos(p1) <-> nanos(p2) within the same family widen to the max precision. + val ltzNanos = + single(TimestampLTZNanosType(7), instantA).union(single(TimestampLTZNanosType(9), instantB)) + assert(ltzNanos.schema("c").dataType === TimestampLTZNanosType(9)) + assert(ltzNanos.count() === 2) + + // Mixed time-zone families widen to the LTZ family. + val mixed = single(TimestampType, instantA).union(single(TimestampNTZNanosType(9), ldtB)) + assert(mixed.schema("c").dataType === TimestampLTZNanosType(9)) + assert(mixed.count() === 2) + + // nanos <-> date widen to the nanos type. + val withDate = single(DateType, LocalDate.parse("2020-01-01")) + .union(single(TimestampNTZNanosType(7), ldtB)) + assert(withDate.schema("c").dataType === TimestampNTZNanosType(7)) + assert(withDate.count() === 2) + } + + test("SPARK-57454: coalesce widens nanosecond timestamps") { + val ltz = twoCols(TimestampLTZNanosType(7), instantA, TimestampLTZNanosType(9), instantB) + val ltzRes = ltz.select(coalesce(col("a"), col("b")).as("c")) + assert(ltzRes.schema("c").dataType === TimestampLTZNanosType(9)) + + val ntz = twoCols(TimestampNTZType, ldtA, TimestampNTZNanosType(8), ldtB) + val ntzRes = ntz.select(coalesce(col("a"), col("b")).as("c")) + assert(ntzRes.schema("c").dataType === TimestampNTZNanosType(8)) + } + + test("SPARK-57454: CASE WHEN widens nanosecond timestamps") { + val ltz = twoCols(TimestampType, instantA, TimestampLTZNanosType(9), instantB) + val ltzRes = ltz.selectExpr("CASE WHEN a < b THEN a ELSE b END AS c") + assert(ltzRes.schema("c").dataType === TimestampLTZNanosType(9)) + + // Mixed time-zone families widen to the LTZ family. + val mixed = twoCols(TimestampNTZType, ldtA, TimestampLTZNanosType(9), instantB) + val mixedRes = mixed.selectExpr("CASE WHEN true THEN a ELSE b END AS c") + assert(mixedRes.schema("c").dataType === TimestampLTZNanosType(9)) + } + + test("SPARK-57454: IN widens nanosecond timestamps") { + // a (p=7) and b (p=9) hold different instants, so `a IN (b)` is false but still type-checks. + val ltz = twoCols(TimestampLTZNanosType(7), instantA, TimestampLTZNanosType(9), instantB) + checkAnswer(ltz.selectExpr("a IN (b)"), Row(false)) + + val ntz = twoCols(TimestampNTZType, ldtA, TimestampNTZNanosType(9), ldtB) + checkAnswer(ntz.selectExpr("a IN (b)"), Row(false)) + } + + test("SPARK-57454: binary comparison widens nanosecond timestamps") { + // Equal absolute instants stored at different precisions compare equal. + val ltzEq = twoCols(TimestampType, instantA, TimestampLTZNanosType(9), instantA) + checkAnswer(ltzEq.selectExpr("a = b", "a < b"), Row(true, false)) + + // b is one microsecond after a. + val ltzLt = twoCols(TimestampLTZNanosType(7), instantA, TimestampLTZNanosType(9), instantB) + checkAnswer(ltzLt.selectExpr("a = b", "a < b"), Row(false, true)) + + val ntzEq = twoCols(TimestampNTZType, ldtA, TimestampNTZNanosType(9), ldtA) + checkAnswer(ntzEq.selectExpr("a = b", "a < b"), Row(true, false)) + } +} + +// Runs the nanosecond timestamp widening tests with ANSI mode enabled explicitly. +class TimestampNanosWideningAnsiOnSuite extends TimestampNanosWideningSuiteBase { + override def sparkConf: SparkConf = super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "true") +} + +// Runs the nanosecond timestamp widening tests with ANSI mode disabled explicitly. +class TimestampNanosWideningAnsiOffSuite extends TimestampNanosWideningSuiteBase { + override def sparkConf: SparkConf = super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "false") +}