diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index e41066256bd4..3ad3ae9cdf12 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -327,6 +327,7 @@ Date and Timestamp Functions unix_date unix_micros unix_millis + unix_nanos unix_seconds unix_timestamp weekday diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 828ac151cd8c..00183fe283f0 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -3531,6 +3531,13 @@ def unix_millis(col: "ColumnOrName") -> Column: unix_millis.__doc__ = pysparkfuncs.unix_millis.__doc__ +def unix_nanos(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("unix_nanos", col) + + +unix_nanos.__doc__ = pysparkfuncs.unix_nanos.__doc__ + + def unix_seconds(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unix_seconds", col) diff --git a/python/pyspark/sql/functions/__init__.py b/python/pyspark/sql/functions/__init__.py index 6d1f794474eb..914b9c7fbcb7 100644 --- a/python/pyspark/sql/functions/__init__.py +++ b/python/pyspark/sql/functions/__init__.py @@ -276,6 +276,7 @@ "unix_date", "unix_micros", "unix_millis", + "unix_nanos", "unix_seconds", "unix_timestamp", "weekday", diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 43340405193f..148e6d5184cf 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -11792,6 +11792,57 @@ def unix_millis(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("unix_millis", col) +@_try_remote_functions +def unix_nanos(col: "ColumnOrName") -> Column: + """Returns the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. + Only supported for ``TIMESTAMP_LTZ(p)`` and ``TIMESTAMP_NTZ(p)`` with precision ``p`` + in ``[7, 9]``. + + .. versionadded:: 4.3.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or column name + input column of nanosecond-precision timestamp values to convert. + + Returns + ------- + :class:`~pyspark.sql.Column` + the number of nanoseconds since 1970-01-01 00:00:00 UTC as ``DECIMAL(21, 0)``. + + See Also + -------- + :meth:`pyspark.sql.functions.unix_date` + :meth:`pyspark.sql.functions.unix_seconds` + :meth:`pyspark.sql.functions.unix_millis` + :meth:`pyspark.sql.functions.unix_micros` + + Examples + -------- + >>> import pyspark.sql.functions as sf + >>> spark.conf.set("spark.sql.timestampNanosTypes.enabled", "true") + >>> df = spark.sql( + ... "SELECT TIMESTAMP_NTZ '2020-01-01 13:24:35.123456789' AS ts" + ... ) + >>> df.select('*', sf.unix_nanos('ts')).show(truncate=False) + +-----------------------------+-------------------+ + |ts |unix_nanos(ts) | + +-----------------------------+-------------------+ + |2020-01-01 13:24:35.123456789|1577885075123456789| + +-----------------------------+-------------------+ + + >>> df.select(sf.unix_nanos(sf.lit(None).cast('timestamp_ntz(9)'))).show() + +------------------------------------------+ + |unix_nanos(CAST(NULL AS TIMESTAMP_NTZ(9)))| + +------------------------------------------+ + | NULL| + +------------------------------------------+ + + >>> spark.conf.unset("spark.sql.timestampNanosTypes.enabled") + """ + return _invoke_function_over_columns("unix_nanos", col) + + @_try_remote_functions def unix_seconds(col: "ColumnOrName") -> Column: """Returns the number of seconds since 1970-01-01 00:00:00 UTC. diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index c9ca0fca96a7..2c8dfc6081b4 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -82,10 +82,7 @@ def test_function_parity(self): missing_in_py = jvm_fn_set.difference(py_fn_set) # Functions that we expect to be missing in python until they are added to pyspark - expected_missing_in_py = { - "unix_nanos", # SPARK-57527: PySpark support tracked as a follow-up - "timestamp_nanos", # SPARK-57526: PySpark support tracked as a follow-up - } + expected_missing_in_py = {"timestamp_nanos"} # SPARK-57526: PySpark support tracked as a follow-up self.assertEqual( expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected"