From 9c10a1dd4c4d19d01689f3b282699fa8d858cc8d Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Fri, 12 Dec 2025 16:09:12 +0000 Subject: [PATCH 1/8] Make example files threadsafe --- doc/getting-started-guide/quick-overview.rst | 25 ++++-- doc/internals/time-coding.rst | 83 ++++++++++++++------ doc/user-guide/complex-numbers.rst | 63 +++++++++++---- doc/user-guide/dask.rst | 7 +- doc/user-guide/io.rst | 41 ++++++++-- doc/user-guide/weather-climate.rst | 25 ++++-- 6 files changed, 187 insertions(+), 57 deletions(-) diff --git a/doc/getting-started-guide/quick-overview.rst b/doc/getting-started-guide/quick-overview.rst index 422b5217ab1..409a57e1c68 100644 --- a/doc/getting-started-guide/quick-overview.rst +++ b/doc/getting-started-guide/quick-overview.rst @@ -213,17 +213,32 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. jupyter-execute:: - ds.to_netcdf("example.nc") - reopened = xr.open_dataset("example.nc") - reopened + filename = "example.nc" .. jupyter-execute:: :hide-code: - import os + # Ensure the file is located in a unique temporary directory + # so that it doesn't conflict with parallel builds of the + # documentation. + + import tempfile + import os.path + + tempdir = tempfile.TemporaryDirectory() + filename = os.path.join(tempdir.name, filename) + +.. jupyter-execute:: + + ds.to_netcdf(filename) + reopened = xr.open_dataset(filename) + reopened + +.. jupyter-execute:: + :hide-code: reopened.close() - os.remove("example.nc") + tempdir.cleanup() It is common for datasets to be distributed across multiple files (commonly one file per timestep). Xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. diff --git a/doc/internals/time-coding.rst b/doc/internals/time-coding.rst index 6ccf40855a7..a510ccbfc17 100644 --- a/doc/internals/time-coding.rst +++ b/doc/internals/time-coding.rst @@ -459,59 +459,94 @@ Default Time Unit The current default time unit of xarray is ``'ns'``. When setting keyword argument ``time_unit`` unit to ``'s'`` (the lowest resolution pandas allows) datetimes will be converted to at least ``'s'``-resolution, if possible. The same holds true for ``'ms'`` and ``'us'``. +.. jupyter-execute:: + + datetimes1_filename = "test-datetimes1.nc" + +.. jupyter-execute:: + :hide-code: + + # Ensure the file is located in a unique temporary directory + # so that it doesn't conflict with parallel builds of the + # documentation. + + import tempfile + import os.path + + tempdir = tempfile.TemporaryDirectory() + datetimes1_filename = os.path.join(tempdir.name, datetimes1_filename) + .. jupyter-execute:: attrs = {"units": "hours since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) - ds.to_netcdf("test-datetimes1.nc") + ds.to_netcdf(datetimes1_filename) .. jupyter-execute:: - xr.open_dataset("test-datetimes1.nc") + xr.open_dataset(datetimes1_filename) .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") - xr.open_dataset("test-datetimes1.nc", decode_times=coder) + xr.open_dataset(datetimes1_filename, decode_times=coder) If a coarser unit is requested the datetimes are decoded into their native on-disk resolution, if possible. +.. jupyter-execute:: + + datetimes2_filename = "test-datetimes2.nc" + +.. jupyter-execute:: + :hide-code: + + datetimes2_filename = os.path.join(tempdir.name, datetimes2_filename) + .. jupyter-execute:: attrs = {"units": "milliseconds since 2000-01-01"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) - ds.to_netcdf("test-datetimes2.nc") + ds.to_netcdf(datetimes2_filename) .. jupyter-execute:: - xr.open_dataset("test-datetimes2.nc") + xr.open_dataset(datetimes2_filename) .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") - xr.open_dataset("test-datetimes2.nc", decode_times=coder) + xr.open_dataset(datetimes2_filename, decode_times=coder) Similar logic applies for decoding timedelta values. The default resolution is ``"ns"``: +.. jupyter-execute:: + + timedeltas1_filename = "test-timedeltas1.nc" + +.. jupyter-execute:: + :hide-code: + + timedeltas1_filename = os.path.join(tempdir.name, timedeltas1_filename) + .. jupyter-execute:: attrs = {"units": "hours"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) - ds.to_netcdf("test-timedeltas1.nc") + ds.to_netcdf(timedeltas1_filename) .. jupyter-execute:: :stderr: - xr.open_dataset("test-timedeltas1.nc") + xr.open_dataset(timedeltas1_filename) By default, timedeltas will be decoded to the same resolution as datetimes: .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") - xr.open_dataset("test-timedeltas1.nc", decode_times=coder, decode_timedelta=True) + xr.open_dataset(timedeltas1_filename, decode_times=coder, decode_timedelta=True) but if one would like to decode timedeltas to a different resolution, one can provide a coder specifically for timedeltas to ``decode_timedelta``: @@ -520,32 +555,41 @@ provide a coder specifically for timedeltas to ``decode_timedelta``: timedelta_coder = xr.coders.CFTimedeltaCoder(time_unit="ms") xr.open_dataset( - "test-timedeltas1.nc", decode_times=coder, decode_timedelta=timedelta_coder + timedeltas1_filename, decode_times=coder, decode_timedelta=timedelta_coder ) As with datetimes, if a coarser unit is requested the timedeltas are decoded into their native on-disk resolution, if possible: +.. jupyter-execute:: + + timedeltas2_filename = "test-timedeltas2.nc" + +.. jupyter-execute:: + :hide-code: + + timedeltas2_filename = os.path.join(tempdir.name, timedeltas2_filename) + .. jupyter-execute:: attrs = {"units": "milliseconds"} ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) - ds.to_netcdf("test-timedeltas2.nc") + ds.to_netcdf(timedeltas2_filename) .. jupyter-execute:: - xr.open_dataset("test-timedeltas2.nc", decode_timedelta=True) + xr.open_dataset(timedeltas2_filename, decode_timedelta=True) .. jupyter-execute:: coder = xr.coders.CFDatetimeCoder(time_unit="s") - xr.open_dataset("test-timedeltas2.nc", decode_times=coder, decode_timedelta=True) + xr.open_dataset(timedeltas2_filename, decode_times=coder, decode_timedelta=True) To opt-out of timedelta decoding (see issue `Undesired decoding to timedelta64 `_) pass ``False`` to ``decode_timedelta``: .. jupyter-execute:: - xr.open_dataset("test-timedeltas2.nc", decode_timedelta=False) + xr.open_dataset(timedeltas2_filename, decode_timedelta=False) .. note:: Note that in the future the default value of ``decode_timedelta`` will be @@ -557,13 +601,4 @@ To opt-out of timedelta decoding (see issue `Undesired decoding to timedelta64 < :hide-code: # Cleanup - import os - - for f in [ - "test-datetimes1.nc", - "test-datetimes2.nc", - "test-timedeltas1.nc", - "test-timedeltas2.nc", - ]: - if os.path.exists(f): - os.remove(f) + tempdir.cleanup() diff --git a/doc/user-guide/complex-numbers.rst b/doc/user-guide/complex-numbers.rst index ea9df880142..208ac6bc4fc 100644 --- a/doc/user-guide/complex-numbers.rst +++ b/doc/user-guide/complex-numbers.rst @@ -6,10 +6,22 @@ Complex Numbers =============== .. jupyter-execute:: - :hide-code: + :hide-code: + + import numpy as np + import xarray as xr + +.. jupyter-execute:: + :hide-code: + + # Ensure the file is located in a unique temporary directory + # so that it doesn't conflict with parallel builds of the + # documentation. + + import tempfile + import os.path - import numpy as np - import xarray as xr + tempdir = tempfile.TemporaryDirectory() Xarray leverages NumPy to seamlessly handle complex numbers in :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects. @@ -52,12 +64,21 @@ Writing complex data to NetCDF files (see :ref:`io.netcdf`) is supported via :py This requires the `h5netcdf `_ library to be installed. + .. jupyter-execute:: + + complex_nums_h5_filename = "complex_nums_h5.nc" + + .. jupyter-execute:: + :hide-code: + + complex_nums_h5_filename = os.path.join(tempdir.name, complex_nums_h5_filename) + .. jupyter-execute:: # write the data to disk - da.to_netcdf("complex_nums_h5.nc", engine="h5netcdf") + da.to_netcdf(complex_nums_h5_filename, engine="h5netcdf") # read the file back into memory - ds_h5 = xr.open_dataset("complex_nums_h5.nc", engine="h5netcdf") + ds_h5 = xr.open_dataset(complex_nums_h5_filename, engine="h5netcdf") # check the dtype ds_h5[da.name].dtype @@ -66,13 +87,22 @@ Writing complex data to NetCDF files (see :ref:`io.netcdf`) is supported via :py Requires the `netcdf4-python (>= 1.7.1) `_ library and you have to enable ``auto_complex=True``. + .. jupyter-execute:: + + complex_nums_nc4_filename = "complex_nums_nc4.nc" + + .. jupyter-execute:: + :hide-code: + + complex_nums_nc4_filename = os.path.join(tempdir.name, complex_nums_nc4_filename) + .. jupyter-execute:: # write the data to disk - da.to_netcdf("complex_nums_nc4.nc", engine="netcdf4", auto_complex=True) + da.to_netcdf(complex_nums_nc4_filename, engine="netcdf4", auto_complex=True) # read the file back into memory ds_nc4 = xr.open_dataset( - "complex_nums_nc4.nc", engine="netcdf4", auto_complex=True + complex_nums_nc4_filename, engine="netcdf4", auto_complex=True ) # check the dtype ds_nc4[da.name].dtype @@ -88,6 +118,15 @@ Alternative: Manual Handling If direct writing is not supported (e.g., targeting NetCDF3), you can manually split the complex array into separate real and imaginary variables before saving: +.. jupyter-execute:: + + complex_manual_filename = "complex_manual.nc" + +.. jupyter-execute:: + :hide-code: + + complex_manual_filename = os.path.join(tempdir.name, complex_manual_filename) + .. jupyter-execute:: # Write data to file @@ -97,10 +136,10 @@ split the complex array into separate real and imaginary variables before saving f"{da.name}_imag": da.imag, } ) - ds_manual.to_netcdf("complex_manual.nc", engine="scipy") # Example + ds_manual.to_netcdf(complex_manual_filename, engine="scipy") # Example # Read data from file - ds = xr.open_dataset("complex_manual.nc", engine="scipy") + ds = xr.open_dataset(complex_manual_filename, engine="scipy") reconstructed = ds[f"{da.name}_real"] + 1j * ds[f"{da.name}_imag"] Recommendations @@ -114,11 +153,7 @@ Recommendations :hide-code: # Cleanup - import os - - for f in ["complex_nums_nc4.nc", "complex_nums_h5.nc", "complex_manual.nc"]: - if os.path.exists(f): - os.remove(f) + tempdir.cleanup() diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index 21fb7157355..57f4f92e7f8 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -17,6 +17,9 @@ Parallel Computing with Dask :hide-code: import os + import tempfile + + tempdir = tempfile.TemporaryDirectory() np.random.seed(123456) @@ -35,7 +38,7 @@ Parallel Computing with Dask "latitude": np.arange(89.5, -90.5, -1), } ) - ds.to_netcdf("example-data.nc") + ds.to_netcdf(os.path.join(tempdir.name, "example-data.nc")) Xarray integrates with `Dask `__, a general purpose library for parallel computing, to handle larger-than-memory computations. @@ -444,7 +447,7 @@ Notice that the 0-shaped sizes were not printed to screen. Since ``template`` ha :hide-code: ds.close() # Closes "example-data.nc". - os.remove("example-data.nc") + tempdir.cleanup() .. tip:: diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index fdabcac9edb..03936f85db3 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -322,6 +322,23 @@ __ https://github.com/Unidata/netcdf4-python We can save a Dataset to disk using the :py:meth:`Dataset.to_netcdf` method: +.. jupyter-execute:: + + nc_filename = "saved_on_disk.nc" + +.. jupyter-execute:: + :hide-code: + + # Ensure the file is located in a unique temporary directory + # so that it doesn't conflict with parallel builds of the + # documentation. + + import tempfile + import os.path + + tempdir = tempfile.TemporaryDirectory() + nc_filename = os.path.join(tempdir.name, nc_filename) + .. jupyter-execute:: ds = xr.Dataset( @@ -333,7 +350,7 @@ We can save a Dataset to disk using the }, ) - ds.to_netcdf("saved_on_disk.nc") + ds.to_netcdf(nc_filename) By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with @@ -352,7 +369,7 @@ We can load netCDF files to create a new Dataset using .. jupyter-execute:: - ds_disk = xr.open_dataset("saved_on_disk.nc") + ds_disk = xr.open_dataset(nc_filename) ds_disk .. jupyter-execute:: @@ -409,7 +426,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement: .. jupyter-execute:: # this automatically closes the dataset after use - with xr.open_dataset("saved_on_disk.nc") as ds: + with xr.open_dataset(nc_filename) as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on @@ -794,6 +811,18 @@ with ``conda install h5netcdf``. Once installed we can use xarray to open HDF5 f The similarities between HDF5 and netCDF4 mean that HDF5 data can be written with the same :py:meth:`Dataset.to_netcdf` method as used for netCDF4 data: +.. jupyter-execute:: + + h5_filename = "saved_on_disk.h5" + +.. jupyter-execute:: + :hide-code: + + # Ensure the file is located in a unique temporary directory + # so that it doesn't conflict with parallel builds of the + # documentation. + h5_filename = os.path.join(tempdir.name, h5_filename) + .. jupyter-execute:: ds = xr.Dataset( @@ -805,7 +834,7 @@ same :py:meth:`Dataset.to_netcdf` method as used for netCDF4 data: }, ) - ds.to_netcdf("saved_on_disk.h5") + ds.to_netcdf(h5_filename) Groups ~~~~~~ @@ -1590,9 +1619,7 @@ To export just the dataset schema without the data itself, use the # `ds` to close the file. del ds - for f in ["saved_on_disk.nc", "saved_on_disk.h5"]: - if os.path.exists(f): - os.remove(f) + tempdir.cleanup() This can be useful for generating indices of dataset contents to expose to search indices or other automated data discovery tools. diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index e1e855375e3..6e3bf3af155 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -258,17 +258,32 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. jupyter-execute:: - da.to_netcdf("example-no-leap.nc") - reopened = xr.open_dataset("example-no-leap.nc") - reopened + filename = "example-no-leap.nc" .. jupyter-execute:: :hide-code: - import os + # Ensure the file is located in a unique temporary directory + # so that it doesn't conflict with parallel builds of the + # documentation. + + import tempfile + import os.path + + tempdir = tempfile.TemporaryDirectory() + filename = os.path.join(tempdir.name, filename) + +.. jupyter-execute:: + + da.to_netcdf(filename) + reopened = xr.open_dataset(filename) + reopened + +.. jupyter-execute:: + :hide-code: reopened.close() - os.remove("example-no-leap.nc") + tempdir.cleanup() - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: From 766dfb9aae6d1093316d2a9b4dc87e929c48f3c8 Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Fri, 12 Dec 2025 16:11:28 +0000 Subject: [PATCH 2/8] Remove sphinx-llm sequantial build option --- doc/conf.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 40c64bd0e74..0e5b2906fa5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -178,11 +178,6 @@ # mermaid config mermaid_version = "11.6.0" -# sphinx-llm config -# Some jupyter-execute cells are not thread-safe, so we need to build sequentially. -# See https://github.com/pydata/xarray/pull/11003#issuecomment-3641648868 -llms_txt_build_parallel = False - # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates", sphinx_autosummary_accessors.templates_path] From cc83e233e49e14c3d43dddedcfe74e57e0e404fe Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Fri, 12 Dec 2025 16:56:40 +0000 Subject: [PATCH 3/8] Remove more filename collisions --- doc/internals/zarr-encoding-spec.rst | 55 ++++++++++++++++++---------- doc/user-guide/io.rst | 54 +++++++++++++-------------- 2 files changed, 61 insertions(+), 48 deletions(-) diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index c34c2f21ddd..0b5cd79fb1d 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -90,6 +90,18 @@ with zarr-python. **Example 1: Zarr V2 Format** +.. jupyter-execute:: + + zarr_v2_filename = "example_v2.zarr" + +.. jupyter-execute:: + :hide-code: + + import tempfile + import os.path + tempdir = tempfile.TemporaryDirectory() + zarr_v2_filename = os.path.join(tempdir.name, zarr_v2_filename) + .. jupyter-execute:: import os @@ -98,30 +110,33 @@ with zarr-python. # Load tutorial dataset and write as Zarr V2 ds = xr.tutorial.load_dataset("rasm") - ds.to_zarr("rasm_v2.zarr", mode="w", consolidated=False, zarr_format=2) + ds.to_zarr(zarr_v2_filename, mode="w", consolidated=False, zarr_format=2) # Open with zarr-python and examine attributes - zgroup = zarr.open("rasm_v2.zarr") + zgroup = zarr.open(zarr_v2_filename) print("Zarr V2 - Tair attributes:") tair_attrs = dict(zgroup["Tair"].attrs) for key, value in tair_attrs.items(): print(f" '{key}': {repr(value)}") +**Example 2: Zarr V3 Format** + .. jupyter-execute:: - :hide-code: - import shutil - shutil.rmtree("rasm_v2.zarr") + zarr_v3_filename = "example_v3.zarr" -**Example 2: Zarr V3 Format** +.. jupyter-execute:: + :hide-code: + + zarr_v3_filename = os.path.join(tempdir.name, zarr_v3_filename) .. jupyter-execute:: # Write the same dataset as Zarr V3 - ds.to_zarr("rasm_v3.zarr", mode="w", consolidated=False, zarr_format=3) + ds.to_zarr(zarr_v3_filename, mode="w", consolidated=False, zarr_format=3) # Open with zarr-python and examine attributes - zgroup = zarr.open("rasm_v3.zarr") + zgroup = zarr.open(zarr_v3_filename) print("Zarr V3 - Tair attributes:") tair_attrs = dict(zgroup["Tair"].attrs) for key, value in tair_attrs.items(): @@ -131,12 +146,6 @@ with zarr-python. tair_array = zgroup["Tair"] print(f"\nZarr V3 - dimension_names in metadata: {tair_array.metadata.dimension_names}") -.. jupyter-execute:: - :hide-code: - - import shutil - shutil.rmtree("rasm_v3.zarr") - Chunk Key Encoding ------------------ @@ -148,6 +157,16 @@ dimension separator in chunk keys. For example, to specify a custom separator for chunk keys: + +.. jupyter-execute:: + + example_filename = "example.zarr" + +.. jupyter-execute:: + :hide-code: + + example_filename = os.path.join(tempdir.name, example_filename) + .. jupyter-execute:: import xarray as xr @@ -161,7 +180,7 @@ For example, to specify a custom separator for chunk keys: arr = np.ones((42, 100)) ds = xr.DataArray(arr, name="var1").to_dataset() ds.to_zarr( - "example.zarr", + example_filename, zarr_format=2, mode="w", encoding={"var1": {"chunks": (42, 50), "chunk_key_encoding": enc}}, @@ -179,8 +198,6 @@ when working with tools that expect a particular chunk key format. chunk key encoding based on the store's format and configuration. .. jupyter-execute:: - :hide-code: - - import shutil + :hide-code: - shutil.rmtree("example.zarr") + tempdir.cleanup() diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 03936f85db3..4076d0a9032 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -330,7 +330,7 @@ We can save a Dataset to disk using the :hide-code: # Ensure the file is located in a unique temporary directory - # so that it doesn't conflict with parallel builds of the + # so that it doesn't conflict with parallel builds of the # documentation. import tempfile @@ -815,14 +815,6 @@ same :py:meth:`Dataset.to_netcdf` method as used for netCDF4 data: h5_filename = "saved_on_disk.h5" -.. jupyter-execute:: - :hide-code: - - # Ensure the file is located in a unique temporary directory - # so that it doesn't conflict with parallel builds of the - # documentation. - h5_filename = os.path.join(tempdir.name, h5_filename) - .. jupyter-execute:: ds = xr.Dataset( @@ -884,10 +876,18 @@ To write a dataset with zarr, we use the :py:meth:`Dataset.to_zarr` method. To write to a local directory, we pass a path to a directory: +.. jupyter-execute:: + + zarr_filename = "example.zarr" + .. jupyter-execute:: :hide-code: - ! rm -rf path/to/directory.zarr + import os.path + import tempfile + + tempdir = tempfile.TemporaryDirectory() + zarr_filename = os.path.join(tempdir.name, zarr_filename) .. jupyter-execute:: :stderr: @@ -900,7 +900,7 @@ To write to a local directory, we pass a path to a directory: "z": ("x", list("abcd")), }, ) - ds.to_zarr("path/to/directory.zarr", zarr_format=2, consolidated=False) + ds.to_zarr(zarr_filename, zarr_format=2, consolidated=False) (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -928,7 +928,7 @@ To read back a zarr dataset that has been created this way, we use the .. jupyter-execute:: - ds_zarr = xr.open_zarr("path/to/directory.zarr", consolidated=False) + ds_zarr = xr.open_zarr(zarr_filename, consolidated=False) ds_zarr Cloud Storage Buckets @@ -1032,7 +1032,7 @@ to Zarr: .. jupyter-execute:: :hide-code: - ! rm -rf path/to/directory.zarr + tempdir.cleanup() .. jupyter-execute:: @@ -1042,9 +1042,8 @@ to Zarr: # shape and chunks are used dummies = dask.array.zeros(30, chunks=10) ds = xr.Dataset({"foo": ("x", dummies)}, coords={"x": np.arange(30)}) - path = "path/to/directory.zarr" # Now we write the metadata without computing any array values - ds.to_zarr(path, compute=False, consolidated=False) + ds.to_zarr(zarr_filename, compute=False, consolidated=False) Now, a Zarr store with the correct variable shapes and attributes exists that can be filled out by subsequent calls to ``to_zarr``. @@ -1059,9 +1058,9 @@ where the data should be written (in index space, not label space), e.g., # we would create them separately possibly even from separate processes. ds = xr.Dataset({"foo": ("x", np.arange(30))}, coords={"x": np.arange(30)}) # Any of the following region specifications are valid - ds.isel(x=slice(0, 10)).to_zarr(path, region="auto", consolidated=False) - ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": "auto"}, consolidated=False) - ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)}, consolidated=False) + ds.isel(x=slice(0, 10)).to_zarr(zarr_filename, region="auto", consolidated=False) + ds.isel(x=slice(10, 20)).to_zarr(zarr_filename, region={"x": "auto"}, consolidated=False) + ds.isel(x=slice(20, 30)).to_zarr(zarr_filename, region={"x": slice(20, 30)}, consolidated=False) Concurrent writes with ``region`` are safe as long as they modify distinct chunks in the underlying Zarr arrays (or use an appropriate ``lock``). @@ -1134,7 +1133,7 @@ order, e.g., for time-stepping a simulation: .. jupyter-execute:: :hide-code: - ! rm -rf path/to/directory.zarr + tempdir.cleanup() .. jupyter-execute:: @@ -1146,7 +1145,7 @@ order, e.g., for time-stepping a simulation: "t": pd.date_range("2001-01-01", periods=2), }, ) - ds1.to_zarr("path/to/directory.zarr", consolidated=False) + ds1.to_zarr(zarr_filename, consolidated=False) .. jupyter-execute:: @@ -1158,7 +1157,7 @@ order, e.g., for time-stepping a simulation: "t": pd.date_range("2001-01-03", periods=2), }, ) - ds2.to_zarr("path/to/directory.zarr", append_dim="t", consolidated=False) + ds2.to_zarr(zarr_filename, append_dim="t", consolidated=False) .. _io.zarr.writing_chunks: @@ -1208,8 +1207,8 @@ split them into chunks: .. jupyter-execute:: - ds.to_zarr("path/to/directory.zarr", consolidated=False, mode="w") - !tree -I zarr.json path/to/directory.zarr + ds.to_zarr(zarr_filename, consolidated=False, mode="w") + !tree -I zarr.json $zarr_filename This may cause unwanted overhead on some systems, such as when reading from a cloud @@ -1219,12 +1218,12 @@ shape of each coordinate array in the ``encoding`` argument: .. jupyter-execute:: ds.to_zarr( - "path/to/directory.zarr", + zarr_filename, encoding={"xc": {"chunks": ds.xc.shape}, "yc": {"chunks": ds.yc.shape}}, consolidated=False, mode="w", ) - !tree -I zarr.json path/to/directory.zarr + !tree -I zarr.json $zarr_filename The number of chunks on Tair matches our dask chunks, while there is now only a single @@ -1682,10 +1681,7 @@ GDAL readable raster data using `rasterio`_ such as GeoTIFFs can be opened usin .. jupyter-execute:: :hide-code: - import shutil - - shutil.rmtree("foo.zarr") - shutil.rmtree("path/to/directory.zarr") + tempdir.cleanup() GRIB format via cfgrib ---------------------- From d7d81a18dc3bfd512955c451786f91d751ef027c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Dec 2025 16:59:33 +0000 Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/getting-started-guide/quick-overview.rst | 2 +- doc/internals/time-coding.rst | 2 +- doc/user-guide/complex-numbers.rst | 8 ++++---- doc/user-guide/weather-climate.rst | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/getting-started-guide/quick-overview.rst b/doc/getting-started-guide/quick-overview.rst index 409a57e1c68..2a39278ddb5 100644 --- a/doc/getting-started-guide/quick-overview.rst +++ b/doc/getting-started-guide/quick-overview.rst @@ -219,7 +219,7 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D :hide-code: # Ensure the file is located in a unique temporary directory - # so that it doesn't conflict with parallel builds of the + # so that it doesn't conflict with parallel builds of the # documentation. import tempfile diff --git a/doc/internals/time-coding.rst b/doc/internals/time-coding.rst index a510ccbfc17..91f07afe16a 100644 --- a/doc/internals/time-coding.rst +++ b/doc/internals/time-coding.rst @@ -467,7 +467,7 @@ The current default time unit of xarray is ``'ns'``. When setting keyword argume :hide-code: # Ensure the file is located in a unique temporary directory - # so that it doesn't conflict with parallel builds of the + # so that it doesn't conflict with parallel builds of the # documentation. import tempfile diff --git a/doc/user-guide/complex-numbers.rst b/doc/user-guide/complex-numbers.rst index 208ac6bc4fc..216165f4dbe 100644 --- a/doc/user-guide/complex-numbers.rst +++ b/doc/user-guide/complex-numbers.rst @@ -15,7 +15,7 @@ Complex Numbers :hide-code: # Ensure the file is located in a unique temporary directory - # so that it doesn't conflict with parallel builds of the + # so that it doesn't conflict with parallel builds of the # documentation. import tempfile @@ -70,7 +70,7 @@ Writing complex data to NetCDF files (see :ref:`io.netcdf`) is supported via :py .. jupyter-execute:: :hide-code: - + complex_nums_h5_filename = os.path.join(tempdir.name, complex_nums_h5_filename) .. jupyter-execute:: @@ -93,7 +93,7 @@ Writing complex data to NetCDF files (see :ref:`io.netcdf`) is supported via :py .. jupyter-execute:: :hide-code: - + complex_nums_nc4_filename = os.path.join(tempdir.name, complex_nums_nc4_filename) .. jupyter-execute:: @@ -124,7 +124,7 @@ split the complex array into separate real and imaginary variables before saving .. jupyter-execute:: :hide-code: - + complex_manual_filename = os.path.join(tempdir.name, complex_manual_filename) .. jupyter-execute:: diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 6e3bf3af155..3b31fadaa70 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -264,7 +264,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: :hide-code: # Ensure the file is located in a unique temporary directory - # so that it doesn't conflict with parallel builds of the + # so that it doesn't conflict with parallel builds of the # documentation. import tempfile From 4ca26927810ade772cfe609efc3d85b166c4f4d1 Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Tue, 16 Dec 2025 11:15:05 +0000 Subject: [PATCH 5/8] More thread safety tweaks --- .gitignore | 2 ++ doc/user-guide/io.rst | 15 ++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index bb2b49c2cd4..ce6915a2bee 100644 --- a/.gitignore +++ b/.gitignore @@ -84,6 +84,8 @@ doc/team-panel.txt doc/external-examples-gallery.txt doc/notebooks-examples-gallery.txt doc/videos-gallery.txt +doc/foo.zarr +doc/*.nc # Until we support this properly, excluding from gitignore. (adding it to # gitignore to make it _easier_ to work with `uv`, not as an indication that I diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 4076d0a9032..4b10281c8d7 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -811,10 +811,6 @@ with ``conda install h5netcdf``. Once installed we can use xarray to open HDF5 f The similarities between HDF5 and netCDF4 mean that HDF5 data can be written with the same :py:meth:`Dataset.to_netcdf` method as used for netCDF4 data: -.. jupyter-execute:: - - h5_filename = "saved_on_disk.h5" - .. jupyter-execute:: ds = xr.Dataset( @@ -826,7 +822,16 @@ same :py:meth:`Dataset.to_netcdf` method as used for netCDF4 data: }, ) - ds.to_netcdf(h5_filename) +.. jupyter-execute:: + :hide-code: + + # Check if the file exists and if not, create it + if not os.path.exists("saved_on_disk.h5"): + ds.to_netcdf("saved_on_disk.h5") + +.. code:: python + + ds.to_netcdf("saved_on_disk.h5") Groups ~~~~~~ From f0953cb251d23479b4c8b64eaf20524d83901bd8 Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Tue, 16 Dec 2025 11:15:40 +0000 Subject: [PATCH 6/8] Add temp doc files to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ce6915a2bee..c37948f226d 100644 --- a/.gitignore +++ b/.gitignore @@ -84,8 +84,9 @@ doc/team-panel.txt doc/external-examples-gallery.txt doc/notebooks-examples-gallery.txt doc/videos-gallery.txt -doc/foo.zarr +doc/*.zarr doc/*.nc +doc/*.h5 # Until we support this properly, excluding from gitignore. (adding it to # gitignore to make it _easier_ to work with `uv`, not as an indication that I From 80f4b3fe70202b4ab74bd42ee39192d770d75979 Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Tue, 16 Dec 2025 11:38:01 +0000 Subject: [PATCH 7/8] Rerun RtD build From 09036304980c158428fb9fc3f88aa6416e833cee Mon Sep 17 00:00:00 2001 From: Jacob Tomlinson Date: Tue, 16 Dec 2025 15:38:07 +0000 Subject: [PATCH 8/8] Write zarr file to temp dir --- doc/user-guide/io.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 4b10281c8d7..57be09deb0a 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -1085,10 +1085,17 @@ zarr ` These options can be passed to the ``to_zarr`` method as variable encoding. For example: +.. jupyter-execute:: + + zarr_filename = "foo.zarr" + .. jupyter-execute:: :hide-code: - ! rm -rf foo.zarr + import os.path + import tempfile + tempdir = tempfile.TemporaryDirectory() + zarr_filename = os.path.join(tempdir.name, zarr_filename) .. jupyter-execute:: @@ -1096,7 +1103,7 @@ For example: from zarr.codecs import BloscCodec compressor = BloscCodec(cname="zstd", clevel=3, shuffle="shuffle") - ds.to_zarr("foo.zarr", consolidated=False, encoding={"foo": {"compressors": [compressor]}}) + ds.to_zarr(zarr_filename, consolidated=False, encoding={"foo": {"compressors": [compressor]}}) .. note::