From c5fb0bd983bc56115449b94ffbf12cdb76343f7c Mon Sep 17 00:00:00 2001 From: Jubin Soni Date: Fri, 19 Jun 2026 17:39:57 -0700 Subject: [PATCH 1/5] [SPARK-57579][PYTHON] Add PySpark support for unix_nanos function --- .../pyspark/sql/connect/functions/builtin.py | 7 +++ python/pyspark/sql/functions/__init__.py | 1 + python/pyspark/sql/functions/builtin.py | 48 +++++++++++++++++++ python/pyspark/sql/tests/test_functions.py | 4 +- 4 files changed, 57 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 828ac151cd8ce..b7fd5789ba73f 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -3524,6 +3524,13 @@ def unix_micros(col: "ColumnOrName") -> Column: unix_micros.__doc__ = pysparkfuncs.unix_micros.__doc__ +def unix_nanos(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("unix_nanos", col) + + +unix_nanos.__doc__ = pysparkfuncs.unix_nanos.__doc__ + + def unix_millis(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unix_millis", col) diff --git a/python/pyspark/sql/functions/__init__.py b/python/pyspark/sql/functions/__init__.py index 6d1f794474eb7..914b9c7fbcb79 100644 --- a/python/pyspark/sql/functions/__init__.py +++ b/python/pyspark/sql/functions/__init__.py @@ -276,6 +276,7 @@ "unix_date", "unix_micros", "unix_millis", + "unix_nanos", "unix_seconds", "unix_timestamp", "weekday", diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 43340405193fb..b2795a51c9aed 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -11749,6 +11749,54 @@ def unix_micros(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unix_micros", col) +@_try_remote_functions +def unix_nanos(col: "ColumnOrName") -> Column: + """Returns the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. + Only supported for ``TIMESTAMP_LTZ(p)`` and ``TIMESTAMP_NTZ(p)`` with precision ``p`` + in ``[7, 9]``. + + .. versionadded:: 4.3.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or column name + input column of nanosecond-precision timestamp values to convert. + + Returns + ------- + :class:`~pyspark.sql.Column` + the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. + + See Also + -------- + :meth:`pyspark.sql.functions.unix_date` + :meth:`pyspark.sql.functions.unix_seconds` + :meth:`pyspark.sql.functions.unix_millis` + :meth:`pyspark.sql.functions.unix_micros` + + Examples + -------- + >>> import pyspark.sql.functions as sf + >>> df = spark.sql( + ... "SELECT TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789' AS ts" + ... ) + >>> df.select('*', sf.unix_nanos('ts')).show() + +-----------------------------+-------------------+ + | ts| unix_nanos(ts)| + +-----------------------------+-------------------+ + |2020-01-01 13:24:35.123456789|1577884675123456789| + +-----------------------------+-------------------+ + + >>> df.select(sf.unix_nanos(sf.lit(None).cast('timestamp_ntz(9)'))).show() + +------------------------------------------+ + |unix_nanos(CAST(NULL AS TIMESTAMP_NTZ(9)))| + +------------------------------------------+ + | NULL| + +------------------------------------------+ + """ + return _invoke_function_over_columns("unix_nanos", col) + + @_try_remote_functions def unix_millis(col: "ColumnOrName") -> Column: """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 10aa01e5a6005..8599d0dd46e1c 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -82,9 +82,7 @@ def test_function_parity(self): missing_in_py = jvm_fn_set.difference(py_fn_set) # Functions that we expect to be missing in python until they are added to pyspark - expected_missing_in_py = { - "unix_nanos", # SPARK-57527: PySpark support tracked as a follow-up - } + expected_missing_in_py = set() self.assertEqual( expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected" From 2c526ec4cce13c2134ab0b5a032d45fa52b2fa5e Mon Sep 17 00:00:00 2001 From: Jubin Soni Date: Sat, 20 Jun 2026 19:32:39 -0700 Subject: [PATCH 2/5] Attempt to fix failing tests --- python/pyspark/sql/functions/builtin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index b2795a51c9aed..95224f68ff7a5 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -11780,11 +11780,11 @@ def unix_nanos(col: "ColumnOrName") -> Column: >>> df = spark.sql( ... "SELECT TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789' AS ts" ... ) - >>> df.select('*', sf.unix_nanos('ts')).show() + >>> df.select('*', sf.unix_nanos('ts')).show(truncate=False) +-----------------------------+-------------------+ - | ts| unix_nanos(ts)| + |ts |unix_nanos(ts) | +-----------------------------+-------------------+ - |2020-01-01 13:24:35.123456789|1577884675123456789| + |2020-01-01 13:24:35.123456789|1577885075123456789| +-----------------------------+-------------------+ >>> df.select(sf.unix_nanos(sf.lit(None).cast('timestamp_ntz(9)'))).show() From 3cfeefe09cd662bf2a490d1327b7b2d46c7094fd Mon Sep 17 00:00:00 2001 From: Jubin Soni Date: Sat, 20 Jun 2026 22:20:53 -0700 Subject: [PATCH 3/5] Swap unix_millis/unix_nanos implementations Fix incorrect ordering/assignments between unix_millis and unix_nanos: swap their implementations and docstrings in python/pyspark/sql/functions/builtin.py and the connect variant so each function now invokes the correct underlying name. Update doc examples and versionadded notes accordingly, add unix_nanos to the functions index in docs (functions.rst), and adjust example usage (time zone and timestamp-nanos feature flags) to reflect the correct semantics. --- .../reference/pyspark.sql/functions.rst | 1 + .../pyspark/sql/connect/functions/builtin.py | 12 +-- python/pyspark/sql/functions/builtin.py | 89 ++++++++++--------- 3 files changed, 53 insertions(+), 49 deletions(-) diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index e41066256bd47..3ad3ae9cdf127 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -327,6 +327,7 @@ Date and Timestamp Functions unix_date unix_micros unix_millis + unix_nanos unix_seconds unix_timestamp weekday diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index b7fd5789ba73f..00183fe283f0a 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -3524,18 +3524,18 @@ def unix_micros(col: "ColumnOrName") -> Column: unix_micros.__doc__ = pysparkfuncs.unix_micros.__doc__ -def unix_nanos(col: "ColumnOrName") -> Column: - return _invoke_function_over_columns("unix_nanos", col) +def unix_millis(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("unix_millis", col) -unix_nanos.__doc__ = pysparkfuncs.unix_nanos.__doc__ +unix_millis.__doc__ = pysparkfuncs.unix_millis.__doc__ -def unix_millis(col: "ColumnOrName") -> Column: - return _invoke_function_over_columns("unix_millis", col) +def unix_nanos(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("unix_nanos", col) -unix_millis.__doc__ = pysparkfuncs.unix_millis.__doc__ +unix_nanos.__doc__ = pysparkfuncs.unix_nanos.__doc__ def unix_seconds(col: "ColumnOrName") -> Column: diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 95224f68ff7a5..29d0626b9d2c8 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -11750,94 +11750,97 @@ def unix_micros(col: "ColumnOrName") -> Column: @_try_remote_functions -def unix_nanos(col: "ColumnOrName") -> Column: - """Returns the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. - Only supported for ``TIMESTAMP_LTZ(p)`` and ``TIMESTAMP_NTZ(p)`` with precision ``p`` - in ``[7, 9]``. +def unix_millis(col: "ColumnOrName") -> Column: + """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. + Truncates higher levels of precision. - .. versionadded:: 4.3.0 + .. versionadded:: 3.5.0 Parameters ---------- col : :class:`~pyspark.sql.Column` or column name - input column of nanosecond-precision timestamp values to convert. + input column of values to convert. Returns ------- :class:`~pyspark.sql.Column` - the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. + the number of milliseconds since 1970-01-01 00:00:00 UTC. See Also -------- :meth:`pyspark.sql.functions.unix_date` :meth:`pyspark.sql.functions.unix_seconds` - :meth:`pyspark.sql.functions.unix_millis` :meth:`pyspark.sql.functions.unix_micros` + :meth:`pyspark.sql.functions.timestamp_millis` Examples -------- + >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + >>> import pyspark.sql.functions as sf - >>> df = spark.sql( - ... "SELECT TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789' AS ts" - ... ) - >>> df.select('*', sf.unix_nanos('ts')).show(truncate=False) - +-----------------------------+-------------------+ - |ts |unix_nanos(ts) | - +-----------------------------+-------------------+ - |2020-01-01 13:24:35.123456789|1577885075123456789| - +-----------------------------+-------------------+ + >>> df = spark.createDataFrame([('2015-07-22 10:00:00',), ('2022-10-09 11:12:13',)], ['ts']) + >>> df.select('*', sf.unix_millis(sf.to_timestamp('ts'))).show() + +-------------------+-----------------------------+ + | ts|unix_millis(to_timestamp(ts))| + +-------------------+-----------------------------+ + |2015-07-22 10:00:00| 1437584400000| + |2022-10-09 11:12:13| 1665339133000| + +-------------------+-----------------------------+ - >>> df.select(sf.unix_nanos(sf.lit(None).cast('timestamp_ntz(9)'))).show() - +------------------------------------------+ - |unix_nanos(CAST(NULL AS TIMESTAMP_NTZ(9)))| - +------------------------------------------+ - | NULL| - +------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") """ - return _invoke_function_over_columns("unix_nanos", col) + return _invoke_function_over_columns("unix_millis", col) @_try_remote_functions -def unix_millis(col: "ColumnOrName") -> Column: - """Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. - Truncates higher levels of precision. +def unix_nanos(col: "ColumnOrName") -> Column: + """Returns the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. + Only supported for ``TIMESTAMP_LTZ(p)`` and ``TIMESTAMP_NTZ(p)`` with precision ``p`` + in ``[7, 9]``. - .. versionadded:: 3.5.0 + .. versionadded:: 4.3.0 Parameters ---------- col : :class:`~pyspark.sql.Column` or column name - input column of values to convert. + input column of nanosecond-precision timestamp values to convert. Returns ------- :class:`~pyspark.sql.Column` - the number of milliseconds since 1970-01-01 00:00:00 UTC. + the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. See Also -------- :meth:`pyspark.sql.functions.unix_date` :meth:`pyspark.sql.functions.unix_seconds` + :meth:`pyspark.sql.functions.unix_millis` :meth:`pyspark.sql.functions.unix_micros` - :meth:`pyspark.sql.functions.timestamp_millis` Examples -------- - >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") - >>> import pyspark.sql.functions as sf - >>> df = spark.createDataFrame([('2015-07-22 10:00:00',), ('2022-10-09 11:12:13',)], ['ts']) - >>> df.select('*', sf.unix_millis(sf.to_timestamp('ts'))).show() - +-------------------+-----------------------------+ - | ts|unix_millis(to_timestamp(ts))| - +-------------------+-----------------------------+ - |2015-07-22 10:00:00| 1437584400000| - |2022-10-09 11:12:13| 1665339133000| - +-------------------+-----------------------------+ + >>> spark.conf.set("spark.sql.timestampNanosTypes.enabled", True) + >>> df = spark.sql( + ... "SELECT TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789' AS ts" + ... ) + >>> df.select('*', sf.unix_nanos('ts')).show(truncate=False) + +-----------------------------+-------------------+ + |ts |unix_nanos(ts) | + +-----------------------------+-------------------+ + |2020-01-01 13:24:35.123456789|1577885075123456789| + +-----------------------------+-------------------+ - >>> spark.conf.unset("spark.sql.session.timeZone") + >>> df.select(sf.unix_nanos(sf.lit(None).cast('timestamp_ntz(9)'))).show() + +------------------------------------------+ + |unix_nanos(CAST(NULL AS TIMESTAMP_NTZ(9)))| + +------------------------------------------+ + | NULL| + +------------------------------------------+ + + >>> spark.conf.unset("spark.sql.timestampNanosTypes.enabled") """ - return _invoke_function_over_columns("unix_millis", col) + return _invoke_function_over_columns("unix_nanos", col) @_try_remote_functions From e6c8b45187276a6e201f9e4f5d5c98d14ee91b7d Mon Sep 17 00:00:00 2001 From: Jubin Soni Date: Mon, 22 Jun 2026 22:17:42 -0700 Subject: [PATCH 4/5] Fixed doctest bool value --- python/pyspark/sql/functions/builtin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 29d0626b9d2c8..148e6d5184cfb 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -11820,7 +11820,7 @@ def unix_nanos(col: "ColumnOrName") -> Column: Examples -------- >>> import pyspark.sql.functions as sf - >>> spark.conf.set("spark.sql.timestampNanosTypes.enabled", True) + >>> spark.conf.set("spark.sql.timestampNanosTypes.enabled", "true") >>> df = spark.sql( ... "SELECT TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789' AS ts" ... ) From 50ad47d24b9b1dbfcc28125f367d49e523c4dafd Mon Sep 17 00:00:00 2001 From: Jubin Soni Date: Tue, 23 Jun 2026 10:33:43 -0700 Subject: [PATCH 5/5] Updated linting --- python/pyspark/sql/tests/test_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 2c8dfc6081b40..16928193db6da 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -82,7 +82,9 @@ def test_function_parity(self): missing_in_py = jvm_fn_set.difference(py_fn_set) # Functions that we expect to be missing in python until they are added to pyspark - expected_missing_in_py = {"timestamp_nanos"} # SPARK-57526: PySpark support tracked as a follow-up + expected_missing_in_py = { + "timestamp_nanos" + } # SPARK-57526: PySpark support tracked as a follow-up self.assertEqual( expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected"