diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 3f773e5bb6dc5..268d86852c3c0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -25,7 +25,7 @@ import java.util.Locale import org.apache.commons.text.StringEscapeUtils -import org.apache.spark.{SparkException, SparkIllegalArgumentException} +import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkRuntimeException} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess} @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.catalyst.trees.TreePattern._ -import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimeFormatter, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT @@ -1231,34 +1231,76 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti def this(left: Expression, right: Expression) = this(left, right, None) override def inputTypes: Seq[AbstractDataType] = - Seq(TimestampType, StringTypeWithCollation(supportsTrimCollation = true)) + Seq(TypeCollection(TimestampType, AnyTimeType), + StringTypeWithCollation(supportsTrimCollation = true)) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) override protected def nullSafeEval(timestamp: Any, format: Any): Any = { - val formatter = formatterOption.getOrElse(getFormatter(format.toString)) - UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long])) + left.dataType match { + case _: TimeType => + val tf = timeFormatterOption.getOrElse( + TimeFormatter(format.toString, isParsing = false)) + DateFormatClass.formatTimeWithError( + tf, timestamp.asInstanceOf[Long], prettyName, format.toString) + case _ => + val formatter = formatterOption.getOrElse(getFormatter(format.toString)) + UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long])) + } } + @transient private lazy val timeFormatterOption: Option[TimeFormatter] = + if (left.dataType.isInstanceOf[TimeType] && right.foldable) { + Option(right.eval()).map(fmt => TimeFormatter(fmt.toString, isParsing = false)) + } else None + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - formatterOption.map { tf => - val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf) - defineCodeGen(ctx, ev, (timestamp, _) => { - s"""UTF8String.fromString($timestampFormatter.format($timestamp))""" - }) - }.getOrElse { - val tf = TimestampFormatter.getClass.getName.stripSuffix("$") - val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$") - val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) - defineCodeGen(ctx, ev, (timestamp, format) => { - s"""|UTF8String.fromString($tf$$.MODULE$$.apply( - | $format.toString(), - | $zid, - | $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(), - | false) - |.format($timestamp))""".stripMargin - }) + left.dataType match { + case _: TimeType => + timeFormatterOption.map { tf => + val timeFormatter = ctx.addReferenceObj("timeFormatter", tf) + val funcName = ctx.addReferenceObj("funcName", prettyName) + val fmtStr = ctx.addReferenceObj("fmtStr", right.eval().toString) + defineCodeGen(ctx, ev, (time, _) => { + s"""|((org.apache.spark.unsafe.types.UTF8String) + |org.apache.spark.sql.catalyst.expressions + |.DateFormatClass.formatTimeWithError( + |$timeFormatter, $time, + |$funcName, $fmtStr))""".stripMargin.replaceAll("\n", "") + }) + }.getOrElse { + val tf = TimeFormatter.getClass.getName.stripSuffix("$") + val funcName = ctx.addReferenceObj("funcName", prettyName) + defineCodeGen(ctx, ev, (time, format) => { + s"""|((org.apache.spark.unsafe.types.UTF8String) + |org.apache.spark.sql.catalyst.expressions + |.DateFormatClass.formatTimeWithError( + |$tf$$.MODULE$$.apply( + |$format.toString(), false), + |$time, $funcName, + |$format.toString()))""".stripMargin.replaceAll("\n", "") + }) + } + case _ => + formatterOption.map { tf => + val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf) + defineCodeGen(ctx, ev, (timestamp, _) => { + s"""UTF8String.fromString($timestampFormatter.format($timestamp))""" + }) + }.getOrElse { + val tf = TimestampFormatter.getClass.getName.stripSuffix("$") + val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$") + val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) + defineCodeGen(ctx, ev, (timestamp, format) => { + s"""|UTF8String.fromString($tf$$.MODULE$$.apply( + | $format.toString(), + | $zid, + | $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(), + | false) + |.format($timestamp))""".stripMargin + }) + } } } @@ -1275,6 +1317,28 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti final override def nodePatternsInternal(): Seq[TreePattern] = Seq(DATETIME) } +object DateFormatClass { + /** + * Formats a TIME value, mapping an invalid pattern to a Spark error. + * Used by both eval and codegen. + */ + def formatTimeWithError( + tf: TimeFormatter, nanos: Long, funcName: String, pattern: String): UTF8String = { + try { + UTF8String.fromString(tf.format(nanos)) + } catch { + case e: java.time.DateTimeException => + throw new SparkRuntimeException( + errorClass = "INVALID_PARAMETER_VALUE.PATTERN", + messageParameters = Map( + "parameter" -> toSQLId("format"), + "functionName" -> toSQLId(funcName), + "value" -> toSQLValue(pattern, StringType)), + cause = e) + } + } +} + /** * Converts time string with given pattern. * Deterministic version of [[UnixTimestamp]], must have at least one parameter. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 929ffdef2b52f..de847dddc9357 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -1207,7 +1207,7 @@ object SimplifyDateTimeConversions extends Rule[LogicalPlan] { // original string is in the same format. case DateFormatClass( GetTimestamp( - e @ DateFormatClass(_, pattern, timeZoneId), + e @ DateFormatClass(child, pattern, timeZoneId), pattern2, TimestampType, _, @@ -1216,7 +1216,8 @@ object SimplifyDateTimeConversions extends Rule[LogicalPlan] { pattern3, timeZoneId3) if pattern.semanticEquals(pattern2) && pattern.semanticEquals(pattern3) - && timeZoneId == timeZoneId2 && timeZoneId == timeZoneId3 => + && timeZoneId == timeZoneId2 && timeZoneId == timeZoneId3 + && !child.dataType.isInstanceOf[TimeType] => e // Remove a timestamp to string conversion followed by a string to timestamp conversions if diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index d6b18a9370e0c..4a797b8b35753 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -28,7 +28,7 @@ import scala.language.postfixOps import scala.reflect.ClassTag import scala.util.Random -import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkUpgradeException} +import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkRuntimeException, SparkUpgradeException} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch @@ -328,6 +328,38 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-57575: DateFormat with TimeType (to_char/to_varchar)") { + // 12:13:14 = (12*3600 + 13*60 + 14) seconds, stored as nanoseconds + val timeNanos = (12L * 3600 + 13 * 60 + 14) * 1000000000L + val timeLit = Literal.create(timeNanos, TimeType(TimeType.DEFAULT_PRECISION)) + + checkEvaluation(DateFormatClass(timeLit, Literal("HH:mm:ss"), UTC_OPT), "12:13:14") + checkEvaluation(DateFormatClass(timeLit, Literal("HH"), UTC_OPT), "12") + checkEvaluation(DateFormatClass(timeLit, Literal("mm"), UTC_OPT), "13") + checkEvaluation(DateFormatClass(timeLit, Literal("ss"), UTC_OPT), "14") + + // Non-default precision (TIME(0)) should also work + val timeNanosLowPrec = (9L * 3600 + 30 * 60 + 0) * 1000000000L + val timeLitLowPrec = Literal.create(timeNanosLowPrec, TimeType(0)) + checkEvaluation(DateFormatClass(timeLitLowPrec, Literal("HH:mm:ss"), UTC_OPT), "09:30:00") + + // Null handling + checkEvaluation( + DateFormatClass(Literal.create(null, TimeType(TimeType.DEFAULT_PRECISION)), + Literal("HH:mm:ss"), UTC_OPT), null) + + // Date-only pattern fields should error for TIME input with Spark error + val datePatternExpr = DateFormatClass(timeLit, Literal("yyyy-MM-dd"), UTC_OPT) + checkErrorInExpression[SparkRuntimeException]( + datePatternExpr, + condition = "INVALID_PARAMETER_VALUE.PATTERN", + parameters = Map( + "parameter" -> "`format`", + "functionName" -> "`date_format`", + "value" -> "'yyyy-MM-dd'") + ) + } + test("Hour") { assert(Hour(Literal.create(null, DateType), UTC_OPT).resolved === false) assert(Hour(Literal(ts), UTC_OPT).resolved) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala index 8e7013f2df038..9b31c3409371b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyDateTimeConversionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{DateFormatClass, GetTimestamp} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DateFormatClass, GetTimestamp} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -66,4 +66,29 @@ class SimplifyDateTimeConversionsSuite extends PlanTest { comparePlans(optimized, expected) } + + test("SPARK-57575: SimplifyDateTimeConversions skips TimeType child") { + val timeAttr = AttributeReference("t", TimeType(TimeType.NANOS_PRECISION))() + val timeRelation = LocalRelation(timeAttr) + val pattern = "HH:mm:ss.SSSSSSSSS" + + val df = DateFormatClass(timeAttr, pattern) + + // date_format(to_timestamp(date_format(time_col, p), p), p) should NOT simplify + // because TIME(9) -> Timestamp truncates sub-micro precision (nanos lost) + val originalQuery = timeRelation + .select( + DateFormatClass( + GetTimestamp( + df, + pattern, + TimestampType), + pattern) as "c1") + .analyze + + val optimized = Optimize.execute(originalQuery) + + // Should NOT be simplified — optimized plan should equal original (no rewrite) + comparePlans(optimized, originalQuery) + } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out index 7c5f1260b6487..b61a35681b0a0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out @@ -393,3 +393,17 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: -- !query analysis Project [date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x] +- OneRowRelation + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query analysis +Project [date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x, date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x] ++- OneRowRelation + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query analysis +Project [date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x, date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out index 7c5f1260b6487..b61a35681b0a0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out @@ -393,3 +393,17 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: -- !query analysis Project [date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x] +- OneRowRelation + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query analysis +Project [date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x, date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x] ++- OneRowRelation + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query analysis +Project [date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x, date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql index 3618bb5c399f9..aefa8e8dd20f8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql @@ -71,3 +71,7 @@ select col, date_format(col, ''), to_char(col, ''), to_varchar(col, '') from v; select date_format(date'2023-08-18', 'yyyy-MM-dd'), to_char(date'2023-08-18', 'yyyy-MM-dd'), to_varchar(date'2023-08-18', 'yyyy-MM-dd'); select date_format(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_char(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_varchar(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_char(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_varchar(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'); + +-- SPARK-57575: TIME type formatting +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss'); +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'); diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out index ba0e9421e5b52..401aff976e9e1 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out @@ -462,3 +462,19 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: struct -- !query output 2023-08-18 09:13:14.000123 2023-08-18 09:13:14.000123 2023-08-18 09:13:14.000123 + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query schema +struct +-- !query output +12:13:14 12:13:14 + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query schema +struct +-- !query output +23:59:59.123456 23:59:59.123456 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out index f659dbf0083d0..76a02428056fc 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out @@ -450,3 +450,19 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm: struct -- !query output 2023-08-18 09:13:14.123456 2023-08-18 09:13:14.123456 2023-08-18 09:13:14.123456 + + +-- !query +select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss') +-- !query schema +struct +-- !query output +12:13:14 12:13:14 + + +-- !query +select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS') +-- !query schema +struct +-- !query output +23:59:59.123456 23:59:59.123456