-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Support an encoding
parameter when reading netCDF files with xarray
#10357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
Thanks for opening your first issue here at xarray! Be sure to follow the issue template! |
You can change following lines in xarray source code class NetCDF4DataStore(WritableCFDataStore):
"""Store for reading and writing data via the Python-NetCDF4 library.
This store supports NetCDF3, NetCDF4 and OpenDAP datasets.
"""
__slots__ = (
"_filename",
"_group",
"_manager",
"_mode",
"autoclose",
"format",
"is_remote",
"lock",
)
def __init__(
- self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
+ self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False, encoding='utf8'
):
import netCDF4
if isinstance(manager, netCDF4.Dataset):
if group is None:
root, group = find_root_and_group(manager)
else:
if type(manager) is not netCDF4.Dataset:
raise ValueError(
"must supply a root netCDF4.Dataset if the group "
"argument is provided"
)
root = manager
manager = DummyFileManager(root)
self._manager = manager
self._group = group
self._mode = mode
self.format = self.ds.data_model
- self._filename = self.ds.filepath()
+ self._filename = self.ds.filepath(encoding=encoding)
self.is_remote = is_remote_uri(self._filename)
self.lock = ensure_lock(lock)
self.autoclose = autoclose
@classmethod
def open(
cls,
filename,
mode="r",
format="NETCDF4",
group=None,
clobber=True,
diskless=False,
persist=False,
auto_complex=None,
lock=None,
lock_maker=None,
autoclose=False,
+ encoding='utf8'
):
import netCDF4
if isinstance(filename, os.PathLike):
filename = os.fspath(filename)
if not isinstance(filename, str):
raise ValueError(
"can only read bytes or file-like objects "
"with engine='scipy' or 'h5netcdf'"
)
if format is None:
format = "NETCDF4"
if lock is None:
if mode == "r":
if is_remote_uri(filename):
lock = NETCDFC_LOCK
else:
lock = NETCDF4_PYTHON_LOCK
else:
if format is None or format.startswith("NETCDF4"):
base_lock = NETCDF4_PYTHON_LOCK
else:
base_lock = NETCDFC_LOCK
lock = combine_locks([base_lock, get_write_lock(filename)])
kwargs = dict(
clobber=clobber,
diskless=diskless,
persist=persist,
format=format,
+ encoding=encoding
)
if auto_complex is not None:
kwargs["auto_complex"] = auto_complex
manager = CachingFileManager(
netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
)
- return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
+ return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose, encoding=encoding) and class NetCDF4BackendEntrypoint(BackendEntrypoint):
"""
Backend for netCDF files based on the netCDF4 package.
It can open ".nc", ".nc4", ".cdf" files and will be chosen
as default for these files.
Additionally it can open valid HDF5 files, see
https://h5netcdf.org/#invalid-netcdf-files for more info.
It will not be detected as valid backend for such files, so make
sure to specify ``engine="netcdf4"`` in ``open_dataset``.
For more information about the underlying library, visit:
https://unidata.github.io/netcdf4-python
See Also
--------
backends.NetCDF4DataStore
backends.H5netcdfBackendEntrypoint
backends.ScipyBackendEntrypoint
"""
description = (
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray"
)
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html"
def guess_can_open(
self,
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
) -> bool:
if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
return True
magic_number = try_read_magic_number_from_path(filename_or_obj)
if magic_number is not None:
# netcdf 3 or HDF5
return magic_number.startswith((b"CDF", b"\211HDF\r\n\032\n"))
if isinstance(filename_or_obj, str | os.PathLike):
_, ext = os.path.splitext(filename_or_obj)
return ext in {".nc", ".nc4", ".cdf"}
return False
def open_dataset(
self,
filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
*,
mask_and_scale=True,
decode_times=True,
concat_characters=True,
decode_coords=True,
drop_variables: str | Iterable[str] | None = None,
use_cftime=None,
decode_timedelta=None,
group=None,
mode="r",
format="NETCDF4",
clobber=True,
diskless=False,
persist=False,
auto_complex=None,
lock=None,
autoclose=False,
+ encoding='utf8'
) -> Dataset:
filename_or_obj = _normalize_path(filename_or_obj)
store = NetCDF4DataStore.open(
filename_or_obj,
mode=mode,
format=format,
group=group,
clobber=clobber,
diskless=diskless,
persist=persist,
auto_complex=auto_complex,
lock=lock,
autoclose=autoclose,
+ encoding=encoding
)
store_entrypoint = StoreBackendEntrypoint()
with close_on_error(store):
ds = store_entrypoint.open_dataset(
store,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
concat_characters=concat_characters,
decode_coords=decode_coords,
drop_variables=drop_variables,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
)
return ds pay attention to all occurrence of |
@wqshen We would happily take a PR to enable this. Are you able to contribute? |
Is your feature request related to a problem?
When I use
netCDF4.Dataset
to open a file whose path contains Chinese characters, I get:If I specify
encoding='gbk'
, it opens correctly:However, xarray’s
open_dataset
does not support anencoding
argument:Related netCDF4 issue comment
(from netCDF4-python#997)
Describe the solution you'd like
I would like xarray to accept an
encoding
parameter so that files with Chinese paths open successfully.Describe alternatives you've considered
No response
Additional context
No response
The text was updated successfully, but these errors were encountered: