Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
import org.apache.spark.sql.catalyst.trees.TreePattern._
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimeFormatter, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT
Expand Down Expand Up @@ -1139,34 +1139,76 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
def this(left: Expression, right: Expression) = this(left, right, None)

override def inputTypes: Seq[AbstractDataType] =
Seq(TimestampType, StringTypeWithCollation(supportsTrimCollation = true))
Seq(TypeCollection(TimestampType, AnyTimeType),
StringTypeWithCollation(supportsTrimCollation = true))

override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))

override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
val formatter = formatterOption.getOrElse(getFormatter(format.toString))
UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long]))
left.dataType match {
case _: TimeType =>
val tf = timeFormatterOption.getOrElse(
TimeFormatter(format.toString, isParsing = false))
DateFormatClass.formatTimeWithError(
tf, timestamp.asInstanceOf[Long], "to_char", format.toString)
case _ =>
val formatter = formatterOption.getOrElse(getFormatter(format.toString))
UTF8String.fromString(formatter.format(timestamp.asInstanceOf[Long]))
}
}

@transient private lazy val timeFormatterOption: Option[TimeFormatter] =
if (left.dataType.isInstanceOf[TimeType] && right.foldable) {
Option(right.eval()).map(fmt => TimeFormatter(fmt.toString, isParsing = false))
} else None

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
formatterOption.map { tf =>
val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf)
defineCodeGen(ctx, ev, (timestamp, _) => {
s"""UTF8String.fromString($timestampFormatter.format($timestamp))"""
})
}.getOrElse {
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
defineCodeGen(ctx, ev, (timestamp, format) => {
s"""|UTF8String.fromString($tf$$.MODULE$$.apply(
| $format.toString(),
| $zid,
| $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(),
| false)
|.format($timestamp))""".stripMargin
})
left.dataType match {
case _: TimeType =>
val errClass = QueryExecutionErrors.getClass.getName.stripSuffix("$")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

errClass is never used — the error wrapping now lives inside formatTimeWithError, so this is leftover. Remove:

Suggested change
val errClass = QueryExecutionErrors.getClass.getName.stripSuffix("$")

timeFormatterOption.map { tf =>
val timeFormatter = ctx.addReferenceObj("timeFormatter", tf)
val funcName = ctx.addReferenceObj("funcName", "to_char")
val fmtStr = ctx.addReferenceObj("fmtStr", right.eval().toString)
defineCodeGen(ctx, ev, (time, _) => {
s"""|((org.apache.spark.unsafe.types.UTF8String)
|org.apache.spark.sql.catalyst.expressions
|.DateFormatClass.formatTimeWithError(
|$timeFormatter, $time,
|$funcName, $fmtStr))""".stripMargin.replaceAll("\n", "")
})
}.getOrElse {
val tf = TimeFormatter.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, (time, format) => {
s"""|((org.apache.spark.unsafe.types.UTF8String)
|org.apache.spark.sql.catalyst.expressions
|.DateFormatClass.formatTimeWithError(
|$tf$$.MODULE$$.apply(
|$format.toString(), false),
|$time, "to_char",
|$format.toString()))""".stripMargin.replaceAll("\n", "")
})
}
case _ =>
formatterOption.map { tf =>
val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf)
defineCodeGen(ctx, ev, (timestamp, _) => {
s"""UTF8String.fromString($timestampFormatter.format($timestamp))"""
})
}.getOrElse {
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
defineCodeGen(ctx, ev, (timestamp, format) => {
s"""|UTF8String.fromString($tf$$.MODULE$$.apply(
| $format.toString(),
| $zid,
| $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(),
| false)
|.format($timestamp))""".stripMargin
})
}
}
}

Expand All @@ -1183,6 +1225,19 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
final override def nodePatternsInternal(): Seq[TreePattern] = Seq(DATETIME)
}

object DateFormatClass {
/** Helper for codegen: formats time with proper Spark error on invalid pattern. */

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This helper is called from both nullSafeEval (interpreted) and doGenCode, not codegen only:

Suggested change
/** Helper for codegen: formats time with proper Spark error on invalid pattern. */
/** Formats a TIME value, mapping an invalid pattern to a Spark error. Used by both eval and codegen. */

def formatTimeWithError(
tf: TimeFormatter, nanos: Long, funcName: String, pattern: String): UTF8String = {
try {
UTF8String.fromString(tf.format(nanos))
} catch {
case e: java.time.DateTimeException =>
throw QueryExecutionErrors.invalidPatternError(funcName, pattern, e)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following up on my own earlier suggestion to reuse this helper — having now seen the rendered message, it's misleading for these functions. invalidPatternError hardcodes parameter -> toSQLId("regexp") (it was written for regexp functions), so this produces The value of parameter(s) ``regexp`` in ``to_char`` is invalid: '<pattern>' — but to_char/to_varchar/date_format have no regexp parameter. And funcName is the literal "to_char" here and at the two codegen sites, yet DateFormatClass also backs date_format and to_varchar, so those are misreported.

Minimal fix is to pass prettyName instead of the literal. Fully dropping the regexp parameter would need a datetime-specific message rather than the regexp-oriented helper — your call whether that's worth it here, but the message is what users will see.

}
}
}

/**
* Converts time string with given pattern.
* Deterministic version of [[UnixTimestamp]], must have at least one parameter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import scala.language.postfixOps
import scala.reflect.ClassTag
import scala.util.Random

import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkUpgradeException}
import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkIllegalArgumentException, SparkRuntimeException, SparkUpgradeException}
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
Expand Down Expand Up @@ -328,6 +328,38 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}

test("SPARK-57575: DateFormat with TimeType (to_char/to_varchar)") {
// 12:13:14 = (12*3600 + 13*60 + 14) seconds, stored as nanoseconds
val timeNanos = (12L * 3600 + 13 * 60 + 14) * 1000000000L
val timeLit = Literal.create(timeNanos, TimeType(TimeType.DEFAULT_PRECISION))

checkEvaluation(DateFormatClass(timeLit, Literal("HH:mm:ss"), UTC_OPT), "12:13:14")
checkEvaluation(DateFormatClass(timeLit, Literal("HH"), UTC_OPT), "12")
checkEvaluation(DateFormatClass(timeLit, Literal("mm"), UTC_OPT), "13")
checkEvaluation(DateFormatClass(timeLit, Literal("ss"), UTC_OPT), "14")

// Non-default precision (TIME(0)) should also work
val timeNanosLowPrec = (9L * 3600 + 30 * 60 + 0) * 1000000000L
val timeLitLowPrec = Literal.create(timeNanosLowPrec, TimeType(0))
checkEvaluation(DateFormatClass(timeLitLowPrec, Literal("HH:mm:ss"), UTC_OPT), "09:30:00")

// Null handling
checkEvaluation(
DateFormatClass(Literal.create(null, TimeType(TimeType.DEFAULT_PRECISION)),
Literal("HH:mm:ss"), UTC_OPT), null)

// Date-only pattern fields should error for TIME input with Spark error
val datePatternExpr = DateFormatClass(timeLit, Literal("yyyy-MM-dd"), UTC_OPT)
checkErrorInExpression[SparkRuntimeException](
datePatternExpr,
condition = "INVALID_PARAMETER_VALUE.PATTERN",
parameters = Map(
"parameter" -> "`regexp`",
"functionName" -> "`to_char`",
"value" -> "'yyyy-MM-dd'")
)
}

test("Hour") {
assert(Hour(Literal.create(null, DateType), UTC_OPT).resolved === false)
assert(Hour(Literal(ts), UTC_OPT).resolved)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,17 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:
-- !query analysis
Project [date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x]
+- OneRowRelation


-- !query
select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss')
-- !query analysis
Project [date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x, date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x]
+- OneRowRelation


-- !query
select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS')
-- !query analysis
Project [date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x, date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x]
+- OneRowRelation
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,17 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:
-- !query analysis
Project [date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x, date_format(cast(2023-08-18 09:13:14.123456 as timestamp), yyyy-MM-dd HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS)#x]
+- OneRowRelation


-- !query
select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss')
-- !query analysis
Project [date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x, date_format(12:13:14, HH:mm:ss, Some(America/Los_Angeles)) AS date_format(TIME '12:13:14', HH:mm:ss)#x]
+- OneRowRelation


-- !query
select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS')
-- !query analysis
Project [date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x, date_format(23:59:59.123456, HH:mm:ss.SSSSSS, Some(America/Los_Angeles)) AS date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS)#x]
+- OneRowRelation
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ select col, date_format(col, ''), to_char(col, ''), to_varchar(col, '') from v;
select date_format(date'2023-08-18', 'yyyy-MM-dd'), to_char(date'2023-08-18', 'yyyy-MM-dd'), to_varchar(date'2023-08-18', 'yyyy-MM-dd');
select date_format(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_char(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'), to_varchar(timestamp_ltz'2023-08-18 09:13:14.123456Z', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_char(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'), to_varchar(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS');

-- SPARK-57575: TIME type formatting
select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss');
select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS');
Original file line number Diff line number Diff line change
Expand Up @@ -462,3 +462,19 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:
struct<date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS):string,date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS):string,date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS):string>
-- !query output
2023-08-18 09:13:14.000123 2023-08-18 09:13:14.000123 2023-08-18 09:13:14.000123


-- !query
select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss')
-- !query schema
struct<date_format(TIME '12:13:14', HH:mm:ss):string,date_format(TIME '12:13:14', HH:mm:ss):string>
-- !query output
12:13:14 12:13:14


-- !query
select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS')
-- !query schema
struct<date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS):string,date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS):string>
-- !query output
23:59:59.123456 23:59:59.123456
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,19 @@ select date_format(timestamp_ntz'2023-08-18 09:13:14.123456', 'yyyy-MM-dd HH:mm:
struct<date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS):string,date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS):string,date_format(TIMESTAMP_NTZ '2023-08-18 09:13:14.123456', yyyy-MM-dd HH:mm:ss.SSSSSS):string>
-- !query output
2023-08-18 09:13:14.123456 2023-08-18 09:13:14.123456 2023-08-18 09:13:14.123456


-- !query
select to_char(TIME'12:13:14', 'HH:mm:ss'), to_varchar(TIME'12:13:14', 'HH:mm:ss')
-- !query schema
struct<date_format(TIME '12:13:14', HH:mm:ss):string,date_format(TIME '12:13:14', HH:mm:ss):string>
-- !query output
12:13:14 12:13:14


-- !query
select to_char(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS'), to_varchar(TIME'23:59:59.123456', 'HH:mm:ss.SSSSSS')
-- !query schema
struct<date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS):string,date_format(TIME '23:59:59.123456', HH:mm:ss.SSSSSS):string>
-- !query output
23:59:59.123456 23:59:59.123456