From a8512e698bc0741060e3553672ca5004bedf053f Mon Sep 17 00:00:00 2001 From: shrirangmhalgi Date: Sat, 20 Jun 2026 22:58:45 -0700 Subject: [PATCH 1/6] [SPARK-57575][SQL] Support TIME type in to_char/to_varchar formatting --- .../expressions/datetimeExpressions.scala | 71 +++++++++++++------ .../expressions/DateExpressionsSuite.scala | 22 ++++++ .../datetime-formatting-legacy.sql.out | 14 ++++ .../datetime-formatting.sql.out | 14 ++++ .../sql-tests/inputs/datetime-formatting.sql | 4 ++ .../datetime-formatting-legacy.sql.out | 16 +++++ .../results/datetime-formatting.sql.out | 16 +++++ 7 files changed, 136 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 3f773e5bb6dc5..b124227bd1fab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.catalyst.trees.TreePattern._ -import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimeFormatter, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT @@ -1231,34 +1231,63 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti def this(left: Expression, right: Expression) = this(left, right, None) override def inputTypes: Seq[AbstractDataType] = - Seq(TimestampType, StringTypeWithCollation(supportsTrimCollation = true)) + Seq(TypeCollection(TimestampType, TimeType(TimeType.DEFAULT_PRECISION)), StringTypeWithCollation(supportsTrimCollation = true)) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) override protected def nullSafeEval(timestamp: Any, format: Any): Any = { - val formatter = formatterOption.getOrElse(getFormatter(format.toString)) - UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long])) + left.dataType match { + case _: TimeType => + val tf = timeFormatterOption.getOrElse( + TimeFormatter(format.toString, isParsing = false)) + UTF8String.fromString(tf.format(timestamp.asInstanceOf[Long])) + case _ => + val formatter = formatterOption.getOrElse(getFormatter(format.toString)) + UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long])) + } } + @transient private lazy val timeFormatterOption: Option[TimeFormatter] = + if (left.dataType.isInstanceOf[TimeType] && right.foldable) { + Option(right.eval()).map(fmt => TimeFormatter(fmt.toString, isParsing = false)) + } else None + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - formatterOption.map { tf => - val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf) - defineCodeGen(ctx, ev, (timestamp, _) => { - s"""UTF8String.fromString($timestampFormatter.format($timestamp))""" - }) - }.getOrElse { - val tf = TimestampFormatter.getClass.getName.stripSuffix("$") - val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$") - val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) - defineCodeGen(ctx, ev, (timestamp, format) => { - s"""|UTF8String.fromString($tf$$.MODULE$$.apply( - | $format.toString(), - | $zid, - | $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(), - | false) - |.format($timestamp))""".stripMargin - }) + left.dataType match { + case _: TimeType => + timeFormatterOption.map { tf => + val timeFormatter = ctx.addReferenceObj("timeFormatter", tf) + defineCodeGen(ctx, ev, (time, _) => { + s"""UTF8String.fromString($timeFormatter.format($time))""" + }) + }.getOrElse { + val tf = TimeFormatter.getClass.getName.stripSuffix("$") + defineCodeGen(ctx, ev, (time, format) => { + s"""|UTF8String.fromString($tf$$.MODULE$$.apply( + | $format.toString(), false) + |.format($time))""".stripMargin + }) + } + case _ => + formatterOption.map { tf => + val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf) + defineCodeGen(ctx, ev, (timestamp, _) => { + s"""UTF8String.fromString($timestampFormatter.format($timestamp))""" + }) + }.getOrElse { + val tf = TimestampFormatter.getClass.getName.stripSuffix("$") + val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$") + val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) + defineCodeGen(ctx, ev, (timestamp, format) => { + s"""|UTF8String.fromString($tf$$.MODULE$$.apply( + | $format.toString(), + | $zid, + | $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(), + | false) + |.format($timestamp))""".stripMargin + }) + } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index d6b18a9370e0c..f4e82e01b96c7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -328,6 +328,28 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-57575: DateFormat with TimeType (to_char/to_varchar)") { + // 12:13:14 = (12*3600 + 13*60 + 14) seconds, stored as nanoseconds + val timeMicros = (12L * 3600 + 13 * 60 + 14) * 1000000000L + val timeLit = Literal.create(timeMicros, TimeType(TimeType.DEFAULT_PRECISION)) + + checkEvaluation(DateFormatClass(timeLit, Literal("HH:mm:ss"), UTC_OPT), "12:13:14") + checkEvaluation(DateFormatClass(timeLit, Literal("HH"), UTC_OPT), "12") + checkEvaluation(DateFormatClass(timeLit, Literal("mm"), UTC_OPT), "13") + checkEvaluation(DateFormatClass(timeLit, Literal("ss"), UTC_OPT), "14") + + // Null handling + checkEvaluation( + DateFormatClass(Literal.create(null, TimeType(TimeType.DEFAULT_PRECISION)), + Literal("HH:mm:ss"), UTC_OPT), null) + + // Date-only pattern fields should error for TIME input + val datePatternExpr = DateFormatClass(timeLit, Literal("yyyy-MM-dd"), UTC_OPT) + intercept[Exception] { + datePatternExpr.eval(InternalRow(timeMicros, UTF8String.fromString("yyyy-MM-dd"))) + } + } + test("Hour") { assert(Hour(Literal.create(null, DateType), UTC_OPT).resolved === false) assert(Hour(Literal(ts), UTC_OPT).resolved) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out index 7c5f1260b6487..b61a35681b0a0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out @@ -393,3 +393,17 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: -- !query analysis Project [date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x] +- OneRowRelation + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query analysis +Project [date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x, date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x] ++- OneRowRelation + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query analysis +Project [date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x, date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out index 7c5f1260b6487..b61a35681b0a0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out @@ -393,3 +393,17 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: -- !query analysis Project [date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x] +- OneRowRelation + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query analysis +Project [date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x, date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x] ++- OneRowRelation + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query analysis +Project [date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x, date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql index 3618bb5c399f9..aefa8e8dd20f8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql @@ -71,3 +71,7 @@ select col, date_format(col, ''), to_char(col, ''), to_varchar(col, '') from v; select date_format(date'2023-08-18', 'yyyy-MM-dd'), to_char(date'2023-08-18', 'yyyy-MM-dd'), to_varchar(date'2023-08-18', 'yyyy-MM-dd'); select date_format(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_char(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_varchar(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_char(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_varchar(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'); + +-- SPARK-57575: TIME type formatting +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss'); +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'); diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out index ba0e9421e5b52..401aff976e9e1 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out @@ -462,3 +462,19 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: struct -- !query output 2023-08-18 09:13:14.000123 2023-08-18 09:13:14.000123 2023-08-18 09:13:14.000123 + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query schema +struct +-- !query output +12:13:14 12:13:14 + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query schema +struct +-- !query output +23:59:59.123456 23:59:59.123456 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out index f659dbf0083d0..76a02428056fc 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out @@ -450,3 +450,19 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: struct -- !query output 2023-08-18 09:13:14.123456 2023-08-18 09:13:14.123456 2023-08-18 09:13:14.123456 + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query schema +struct +-- !query output +12:13:14 12:13:14 + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query schema +struct +-- !query output +23:59:59.123456 23:59:59.123456 From eb1e1e715fcf5775db0bd3011b7851b7425f44e5 Mon Sep 17 00:00:00 2001 From: shrirangmhalgi Date: Sun, 21 Jun 2026 08:46:48 -0700 Subject: [PATCH 2/6] Address MaxGekk's review: use AnyTimeType, pin DateTimeException, add TIME(0) test, rename variable --- .../catalyst/expressions/datetimeExpressions.scala | 2 +- .../catalyst/expressions/DateExpressionsSuite.scala | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index b124227bd1fab..7a83c93b0143a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1231,7 +1231,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti def this(left: Expression, right: Expression) = this(left, right, None) override def inputTypes: Seq[AbstractDataType] = - Seq(TypeCollection(TimestampType, TimeType(TimeType.DEFAULT_PRECISION)), StringTypeWithCollation(supportsTrimCollation = true)) + Seq(TypeCollection(TimestampType, AnyTimeType), StringTypeWithCollation(supportsTrimCollation = true)) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index f4e82e01b96c7..ae3188c53f15d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -330,14 +330,19 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-57575: DateFormat with TimeType (to_char/to_varchar)") { // 12:13:14 = (12*3600 + 13*60 + 14) seconds, stored as nanoseconds - val timeMicros = (12L * 3600 + 13 * 60 + 14) * 1000000000L - val timeLit = Literal.create(timeMicros, TimeType(TimeType.DEFAULT_PRECISION)) + val timeNanos = (12L * 3600 + 13 * 60 + 14) * 1000000000L + val timeLit = Literal.create(timeNanos, TimeType(TimeType.DEFAULT_PRECISION)) checkEvaluation(DateFormatClass(timeLit, Literal("HH:mm:ss"), UTC_OPT), "12:13:14") checkEvaluation(DateFormatClass(timeLit, Literal("HH"), UTC_OPT), "12") checkEvaluation(DateFormatClass(timeLit, Literal("mm"), UTC_OPT), "13") checkEvaluation(DateFormatClass(timeLit, Literal("ss"), UTC_OPT), "14") + // Non-default precision (TIME(0)) should also work + val timeNanosLowPrec = (9L * 3600 + 30 * 60 + 0) * 1000000000L + val timeLitLowPrec = Literal.create(timeNanosLowPrec, TimeType(0)) + checkEvaluation(DateFormatClass(timeLitLowPrec, Literal("HH:mm:ss"), UTC_OPT), "09:30:00") + // Null handling checkEvaluation( DateFormatClass(Literal.create(null, TimeType(TimeType.DEFAULT_PRECISION)), @@ -345,8 +350,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // Date-only pattern fields should error for TIME input val datePatternExpr = DateFormatClass(timeLit, Literal("yyyy-MM-dd"), UTC_OPT) - intercept[Exception] { - datePatternExpr.eval(InternalRow(timeMicros, UTF8String.fromString("yyyy-MM-dd"))) + intercept[java.time.DateTimeException] { + datePatternExpr.eval(InternalRow(timeNanos, UTF8String.fromString("yyyy-MM-dd"))) } } From 26f52856dbad615c289819b91e14299cebc17351 Mon Sep 17 00:00:00 2001 From: shrirangmhalgi Date: Sun, 21 Jun 2026 13:10:59 -0700 Subject: [PATCH 3/6] Wrap date-pattern rejection in proper Spark error (INVALID_PARAMETER_VALUE.PATTERN) --- .../expressions/datetimeExpressions.scala | 30 +++++++++++++++---- .../expressions/DateExpressionsSuite.scala | 15 ++++++---- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 7a83c93b0143a..3daa02cc7319f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1241,7 +1241,8 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti case _: TimeType => val tf = timeFormatterOption.getOrElse( TimeFormatter(format.toString, isParsing = false)) - UTF8String.fromString(tf.format(timestamp.asInstanceOf[Long])) + DateFormatClass.formatTimeWithError( + tf, timestamp.asInstanceOf[Long], "to_char", format.toString) case _ => val formatter = formatterOption.getOrElse(getFormatter(format.toString)) UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long])) @@ -1256,17 +1257,23 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { left.dataType match { case _: TimeType => + val errClass = QueryExecutionErrors.getClass.getName.stripSuffix("$") timeFormatterOption.map { tf => val timeFormatter = ctx.addReferenceObj("timeFormatter", tf) + val funcName = ctx.addReferenceObj("funcName", "to_char") + val fmtStr = ctx.addReferenceObj("fmtStr", right.eval().toString) defineCodeGen(ctx, ev, (time, _) => { - s"""UTF8String.fromString($timeFormatter.format($time))""" + s"""|((org.apache.spark.unsafe.types.UTF8String) + |org.apache.spark.sql.catalyst.expressions.DateFormatClass + |.formatTimeWithError($timeFormatter, $time, $funcName, $fmtStr))""".stripMargin.replaceAll("\n", "") }) }.getOrElse { val tf = TimeFormatter.getClass.getName.stripSuffix("$") defineCodeGen(ctx, ev, (time, format) => { - s"""|UTF8String.fromString($tf$$.MODULE$$.apply( - | $format.toString(), false) - |.format($time))""".stripMargin + s"""|((org.apache.spark.unsafe.types.UTF8String) + |org.apache.spark.sql.catalyst.expressions.DateFormatClass + |.formatTimeWithError($tf$$.MODULE$$.apply( + | $format.toString(), false), $time, "to_char", $format.toString()))""".stripMargin.replaceAll("\n", "") }) } case _ => @@ -1304,6 +1311,19 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti final override def nodePatternsInternal(): Seq[TreePattern] = Seq(DATETIME) } +object DateFormatClass { + /** Helper for codegen: formats time with proper Spark error on invalid pattern. */ + def formatTimeWithError( + tf: TimeFormatter, nanos: Long, funcName: String, pattern: String): UTF8String = { + try { + UTF8String.fromString(tf.format(nanos)) + } catch { + case e: java.time.DateTimeException => + throw QueryExecutionErrors.invalidPatternError(funcName, pattern, e) + } + } +} + /** * Converts time string with given pattern. * Deterministic version of [[UnixTimestamp]], must have at least one parameter. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index ae3188c53f15d..237a5080e8992 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -28,7 +28,7 @@ import scala.language.postfixOps import scala.reflect.ClassTag import scala.util.Random -import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkUpgradeException} +import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkRuntimeException, SparkUpgradeException} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch @@ -348,11 +348,16 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { DateFormatClass(Literal.create(null, TimeType(TimeType.DEFAULT_PRECISION)), Literal("HH:mm:ss"), UTC_OPT), null) - // Date-only pattern fields should error for TIME input + // Date-only pattern fields should error for TIME input with Spark error val datePatternExpr = DateFormatClass(timeLit, Literal("yyyy-MM-dd"), UTC_OPT) - intercept[java.time.DateTimeException] { - datePatternExpr.eval(InternalRow(timeNanos, UTF8String.fromString("yyyy-MM-dd"))) - } + checkErrorInExpression[SparkRuntimeException]( + datePatternExpr, + condition = "INVALID_PARAMETER_VALUE.PATTERN", + parameters = Map( + "parameter" -> "`regexp`", + "functionName" -> "`to_char`", + "value" -> "'yyyy-MM-dd'") + ) } test("Hour") { From f42c3e00fd1adf60116011ffa4f6f17a2c753f7b Mon Sep 17 00:00:00 2001 From: shrirangmhalgi Date: Sun, 21 Jun 2026 22:34:36 -0700 Subject: [PATCH 4/6] Fix scalastyle: break long lines in codegen to stay under 100 chars --- .../expressions/datetimeExpressions.scala | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 3daa02cc7319f..5d8fe42a50b52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1231,7 +1231,8 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti def this(left: Expression, right: Expression) = this(left, right, None) override def inputTypes: Seq[AbstractDataType] = - Seq(TypeCollection(TimestampType, AnyTimeType), StringTypeWithCollation(supportsTrimCollation = true)) + Seq(TypeCollection(TimestampType, AnyTimeType), + StringTypeWithCollation(supportsTrimCollation = true)) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) @@ -1264,16 +1265,21 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti val fmtStr = ctx.addReferenceObj("fmtStr", right.eval().toString) defineCodeGen(ctx, ev, (time, _) => { s"""|((org.apache.spark.unsafe.types.UTF8String) - |org.apache.spark.sql.catalyst.expressions.DateFormatClass - |.formatTimeWithError($timeFormatter, $time, $funcName, $fmtStr))""".stripMargin.replaceAll("\n", "") + |org.apache.spark.sql.catalyst.expressions + |.DateFormatClass.formatTimeWithError( + |$timeFormatter, $time, + |$funcName, $fmtStr))""".stripMargin.replaceAll("\n", "") }) }.getOrElse { val tf = TimeFormatter.getClass.getName.stripSuffix("$") defineCodeGen(ctx, ev, (time, format) => { s"""|((org.apache.spark.unsafe.types.UTF8String) - |org.apache.spark.sql.catalyst.expressions.DateFormatClass - |.formatTimeWithError($tf$$.MODULE$$.apply( - | $format.toString(), false), $time, "to_char", $format.toString()))""".stripMargin.replaceAll("\n", "") + |org.apache.spark.sql.catalyst.expressions + |.DateFormatClass.formatTimeWithError( + |$tf$$.MODULE$$.apply( + |$format.toString(), false), + |$time, "to_char", + |$format.toString()))""".stripMargin.replaceAll("\n", "") }) } case _ => From 3b1b06b547ed3359ec7121181d71fe00e8c1986e Mon Sep 17 00:00:00 2001 From: shrirangmhalgi Date: Mon, 22 Jun 2026 13:14:48 -0700 Subject: [PATCH 5/6] Address MaxGekk r3: use prettyName in error, remove dead errClass, fix scaladoc --- .../expressions/datetimeExpressions.scala | 23 +++++++++++++------ .../expressions/DateExpressionsSuite.scala | 4 ++-- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 5d8fe42a50b52..268d86852c3c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -25,7 +25,7 @@ import java.util.Locale import org.apache.commons.text.StringEscapeUtils -import org.apache.spark.{SparkException, SparkIllegalArgumentException} +import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkRuntimeException} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess} @@ -1243,7 +1243,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti val tf = timeFormatterOption.getOrElse( TimeFormatter(format.toString, isParsing = false)) DateFormatClass.formatTimeWithError( - tf, timestamp.asInstanceOf[Long], "to_char", format.toString) + tf, timestamp.asInstanceOf[Long], prettyName, format.toString) case _ => val formatter = formatterOption.getOrElse(getFormatter(format.toString)) UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long])) @@ -1258,10 +1258,9 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { left.dataType match { case _: TimeType => - val errClass = QueryExecutionErrors.getClass.getName.stripSuffix("$") timeFormatterOption.map { tf => val timeFormatter = ctx.addReferenceObj("timeFormatter", tf) - val funcName = ctx.addReferenceObj("funcName", "to_char") + val funcName = ctx.addReferenceObj("funcName", prettyName) val fmtStr = ctx.addReferenceObj("fmtStr", right.eval().toString) defineCodeGen(ctx, ev, (time, _) => { s"""|((org.apache.spark.unsafe.types.UTF8String) @@ -1272,13 +1271,14 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti }) }.getOrElse { val tf = TimeFormatter.getClass.getName.stripSuffix("$") + val funcName = ctx.addReferenceObj("funcName", prettyName) defineCodeGen(ctx, ev, (time, format) => { s"""|((org.apache.spark.unsafe.types.UTF8String) |org.apache.spark.sql.catalyst.expressions |.DateFormatClass.formatTimeWithError( |$tf$$.MODULE$$.apply( |$format.toString(), false), - |$time, "to_char", + |$time, $funcName, |$format.toString()))""".stripMargin.replaceAll("\n", "") }) } @@ -1318,14 +1318,23 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti } object DateFormatClass { - /** Helper for codegen: formats time with proper Spark error on invalid pattern. */ + /** + * Formats a TIME value, mapping an invalid pattern to a Spark error. + * Used by both eval and codegen. + */ def formatTimeWithError( tf: TimeFormatter, nanos: Long, funcName: String, pattern: String): UTF8String = { try { UTF8String.fromString(tf.format(nanos)) } catch { case e: java.time.DateTimeException => - throw QueryExecutionErrors.invalidPatternError(funcName, pattern, e) + throw new SparkRuntimeException( + errorClass = "INVALID_PARAMETER_VALUE.PATTERN", + messageParameters = Map( + "parameter" -> toSQLId("format"), + "functionName" -> toSQLId(funcName), + "value" -> toSQLValue(pattern, StringType)), + cause = e) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 237a5080e8992..4a797b8b35753 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -354,8 +354,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { datePatternExpr, condition = "INVALID_PARAMETER_VALUE.PATTERN", parameters = Map( - "parameter" -> "`regexp`", - "functionName" -> "`to_char`", + "parameter" -> "`format`", + "functionName" -> "`date_format`", "value" -> "'yyyy-MM-dd'") ) } From 821ac2e833731d1b3270a26c7e8ff033599ec385 Mon Sep 17 00:00:00 2001 From: shrirangmhalgi Date: Mon, 22 Jun 2026 14:26:54 -0700 Subject: [PATCH 6/6] Guard SimplifyDateTimeConversions case 1 to skip TimeType child (sub-micro precision safety) --- .../sql/catalyst/optimizer/expressions.scala | 5 ++-- .../SimplifyDateTimeConversionsSuite.scala | 27 ++++++++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 929ffdef2b52f..de847dddc9357 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -1207,7 +1207,7 @@ object SimplifyDateTimeConversions extends Rule[LogicalPlan] { // original string is in the same format. case DateFormatClass( GetTimestamp( - e @ DateFormatClass(_, pattern, timeZoneId), + e @ DateFormatClass(child, pattern, timeZoneId), pattern2, TimestampType, _, @@ -1216,7 +1216,8 @@ object SimplifyDateTimeConversions extends Rule[LogicalPlan] { pattern3, timeZoneId3) if pattern.semanticEquals(pattern2) && pattern.semanticEquals(pattern3) - && timeZoneId == timeZoneId2 && timeZoneId == timeZoneId3 => + && timeZoneId == timeZoneId2 && timeZoneId == timeZoneId3 + && !child.dataType.isInstanceOf[TimeType] => e // Remove a timestamp to string conversion followed by a string to timestamp conversions if diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala index 8e7013f2df038..9b31c3409371b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{DateFormatClass, GetTimestamp} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DateFormatClass, GetTimestamp} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -66,4 +66,29 @@ class SimplifyDateTimeConversionsSuite extends PlanTest { comparePlans(optimized, expected) } + + test("SPARK-57575: SimplifyDateTimeConversions skips TimeType child") { + val timeAttr = AttributeReference("t", TimeType(TimeType.NANOS_PRECISION))() + val timeRelation = LocalRelation(timeAttr) + val pattern = "HH:mm:ss.SSSSSSSSS" + + val df = DateFormatClass(timeAttr, pattern) + + // date_format(to_timestamp(date_format(time_col, p), p), p) should NOT simplify + // because TIME(9) -> Timestamp truncates sub-micro precision (nanos lost) + val originalQuery = timeRelation + .select( + DateFormatClass( + GetTimestamp( + df, + pattern, + TimestampType), + pattern) as "c1") + .analyze + + val optimized = Optimize.execute(originalQuery) + + // Should NOT be simplified — optimized plan should equal original (no rewrite) + comparePlans(optimized, originalQuery) + } }