# Licensed under a 3-clause BSD style license - see PYFITS.rst
import gzip
import itertools
import os
import re
import shutil
import sys
import warnings
import numpy as np
from astropy.io.fits.file import FILE_MODES, _File
from astropy.io.fits.header import _pad_length
from astropy.io.fits.util import (
_free_space_check,
_get_array_mmap,
_is_int,
_tmp_name,
fileobj_closed,
fileobj_mode,
ignore_sigint,
isfile,
)
from astropy.io.fits.verify import VerifyError, VerifyWarning, _ErrList, _Verify
from astropy.utils import indent
from astropy.utils.compat.numpycompat import NUMPY_LT_2_0
# NOTE: Python can be built without bz2.
from astropy.utils.compat.optional_deps import HAS_BZ2
from astropy.utils.exceptions import AstropyUserWarning
from .base import ExtensionHDU, _BaseHDU, _NonstandardHDU, _ValidHDU
from .compressed.compressed import CompImageHDU
from .groups import GroupsHDU
from .image import ImageHDU, PrimaryHDU
from .table import BinTableHDU
if HAS_BZ2:
import bz2
__all__ = ["HDUList", "fitsopen"]
# FITS file signature as per RFC 4047
FITS_SIGNATURE = b"SIMPLE = T"
def fitsopen(
name,
mode="readonly",
memmap=None,
save_backup=False,
cache=True,
lazy_load_hdus=None,
ignore_missing_simple=False,
*,
use_fsspec=None,
fsspec_kwargs=None,
decompress_in_memory=False,
**kwargs,
):
"""Factory function to open a FITS file and return an `HDUList` object.
Parameters
----------
name : str, file-like or `pathlib.Path`
File to be opened.
mode : str, optional
Open mode, 'readonly', 'update', 'append', 'denywrite', or
'ostream'. Default is 'readonly'.
If ``name`` is a file object that is already opened, ``mode`` must
match the mode the file was opened with, readonly (rb), update (rb+),
append (ab+), ostream (w), denywrite (rb)).
memmap : bool, optional
Is memory mapping to be used? This value is obtained from the
configuration item ``astropy.io.fits.Conf.use_memmap``.
Default is `True`.
save_backup : bool, optional
If the file was opened in update or append mode, this ensures that
a backup of the original file is saved before any changes are flushed.
The backup has the same name as the original file with ".bak" appended.
If "file.bak" already exists then "file.bak.1" is used, and so on.
Default is `False`.
cache : bool, optional
If the file name is a URL, `~astropy.utils.data.download_file` is used
to open the file. This specifies whether or not to save the file
locally in Astropy's download cache. Default is `True`.
lazy_load_hdus : bool, optional
To avoid reading all the HDUs and headers in a FITS file immediately
upon opening. This is an optimization especially useful for large
files, as FITS has no way of determining the number and offsets of all
the HDUs in a file without scanning through the file and reading all
the headers. Default is `True`.
To disable lazy loading and read all HDUs immediately (the old
behavior) use ``lazy_load_hdus=False``. This can lead to fewer
surprises--for example with lazy loading enabled, ``len(hdul)``
can be slow, as it means the entire FITS file needs to be read in
order to determine the number of HDUs. ``lazy_load_hdus=False``
ensures that all HDUs have already been loaded after the file has
been opened.
.. versionadded:: 1.3
uint : bool, optional
Interpret signed integer data where ``BZERO`` is the central value and
``BSCALE == 1`` as unsigned integer data. For example, ``int16`` data
with ``BZERO = 32768`` and ``BSCALE = 1`` would be treated as
``uint16`` data. Default is `True` so that the pseudo-unsigned
integer convention is assumed.
ignore_missing_end : bool, optional
Do not raise an exception when opening a file that is missing an
``END`` card in the last header. Default is `False`.
ignore_missing_simple : bool, optional
Do not raise an exception when the SIMPLE keyword is missing. Note
that io.fits will raise a warning if a SIMPLE card is present but
written in a way that does not follow the FITS Standard.
Default is `False`.
.. versionadded:: 4.2
checksum : bool, str, optional
If `True`, verifies that both ``DATASUM`` and ``CHECKSUM`` card values
(when present in the HDU header) match the header and data of all HDU's
in the file. Updates to a file that already has a checksum will
preserve and update the existing checksums unless this argument is
given a value of 'remove', in which case the CHECKSUM and DATASUM
values are not checked, and are removed when saving changes to the
file. Default is `False`.
disable_image_compression : bool, optional
If `True`, treats compressed image HDU's like normal binary table
HDU's. Default is `False`.
do_not_scale_image_data : bool, optional
If `True`, image data is not scaled using BSCALE/BZERO values
when read. Default is `False`.
character_as_bytes : bool, optional
Whether to return bytes for string columns, otherwise unicode strings
are returned, but this does not respect memory mapping and loads the
whole column in memory when accessed. Default is `False`.
ignore_blank : bool, optional
If `True`, the BLANK keyword is ignored if present.
Default is `False`.
scale_back : bool, optional
If `True`, when saving changes to a file that contained scaled image
data, restore the data to the original type and reapply the original
BSCALE/BZERO values. This could lead to loss of accuracy if scaling
back to integer values after performing floating point operations on
the data. Default is `False`.
output_verify : str
Output verification option. Must be one of ``"fix"``,
``"silentfix"``, ``"ignore"``, ``"warn"``, or
``"exception"``. May also be any combination of ``"fix"`` or
``"silentfix"`` with ``"+ignore"``, ``+warn``, or ``+exception"
(e.g. ``"fix+warn"``). See :ref:`astropy:verify` for more info.
use_fsspec : bool, optional
Use `fsspec.open` to open the file? Defaults to `False` unless
``name`` starts with the Amazon S3 storage prefix ``s3://`` or the
Google Cloud Storage prefix ``gs://``. Can also be used for paths
with other prefixes (e.g., ``http://``) but in this case you must
explicitly pass ``use_fsspec=True``.
Use of this feature requires the optional ``fsspec`` package.
A ``ModuleNotFoundError`` will be raised if the dependency is missing.
.. versionadded:: 5.2
fsspec_kwargs : dict, optional
Keyword arguments passed on to `fsspec.open`. This can be used to
configure cloud storage credentials and caching behavior.
For example, pass ``fsspec_kwargs={"anon": True}`` to enable
anonymous access to Amazon S3 open data buckets.
See ``fsspec``'s documentation for available parameters.
.. versionadded:: 5.2
decompress_in_memory : bool, optional
By default files are decompressed progressively depending on what data
is needed. This is good for memory usage, avoiding decompression of
the whole file, but it can be slow. With decompress_in_memory=True it
is possible to decompress instead the whole file in memory.
.. versionadded:: 6.0
Returns
-------
hdulist : `HDUList`
`HDUList` containing all of the header data units in the file.
"""
from astropy.io.fits import conf
if memmap is None:
# distinguish between True (kwarg explicitly set)
# and None (preference for memmap in config, might be ignored)
memmap = None if conf.use_memmap else False
else:
memmap = bool(memmap)
if lazy_load_hdus is None:
lazy_load_hdus = conf.lazy_load_hdus
else:
lazy_load_hdus = bool(lazy_load_hdus)
if "uint" not in kwargs:
kwargs["uint"] = conf.enable_uint
if not name:
raise ValueError(f"Empty filename: {name!r}")
return HDUList.fromfile(
name,
mode,
memmap,
save_backup,
cache,
lazy_load_hdus,
ignore_missing_simple,
use_fsspec=use_fsspec,
fsspec_kwargs=fsspec_kwargs,
decompress_in_memory=decompress_in_memory,
**kwargs,
)
[docs]
class HDUList(list, _Verify):
"""
HDU list class. This is the top-level FITS object. When a FITS
file is opened, a `HDUList` object is returned.
"""
def __init__(self, hdus=[], file=None):
"""
Construct a `HDUList` object.
Parameters
----------
hdus : BaseHDU or sequence thereof, optional
The HDU object(s) to comprise the `HDUList`. Should be
instances of HDU classes like `ImageHDU` or `BinTableHDU`.
file : file-like, bytes, optional
The opened physical file associated with the `HDUList`
or a bytes object containing the contents of the FITS
file.
"""
if isinstance(file, bytes):
self._data = file
self._file = None
else:
self._file = file
self._data = None
# For internal use only--the keyword args passed to fitsopen /
# HDUList.fromfile/string when opening the file
self._open_kwargs = {}
self._in_read_next_hdu = False
# If we have read all the HDUs from the file or not
# The assumes that all HDUs have been written when we first opened the
# file; we do not currently support loading additional HDUs from a file
# while it is being streamed to. In the future that might be supported
# but for now this is only used for the purpose of lazy-loading of
# existing HDUs.
if file is None:
self._read_all = True
elif self._file is not None:
# Should never attempt to read HDUs in ostream mode
self._read_all = self._file.mode == "ostream"
else:
self._read_all = False
if hdus is None:
hdus = []
# can take one HDU, as well as a list of HDU's as input
if isinstance(hdus, _ValidHDU):
hdus = [hdus]
elif not isinstance(hdus, (HDUList, list)):
raise TypeError("Invalid input for HDUList.")
for idx, hdu in enumerate(hdus):
if not isinstance(hdu, _BaseHDU):
raise TypeError(f"Element {idx} in the HDUList input is not an HDU.")
super().__init__(hdus)
if file is None:
# Only do this when initializing from an existing list of HDUs
# When initializing from a file, this will be handled by the
# append method after the first HDU is read
self.update_extend()
def __len__(self):
if not self._in_read_next_hdu:
self.readall()
return super().__len__()
def __repr__(self):
# Special case: if the FITS file is located on a remote file system
# and has not been fully read yet, we return a simplified repr to
# avoid downloading the entire file. We can tell that a file is remote
# from the fact that the ``fsspec`` package was used to open it.
is_fsspec_file = self._file and "fsspec" in str(
self._file._file.__class__.__bases__
)
if not self._read_all and is_fsspec_file:
return f"{type(self)} (partially read)"
# In order to correctly repr an HDUList we need to load all the
# HDUs as well
self.readall()
return super().__repr__()
def __iter__(self):
# While effectively this does the same as:
# for idx in range(len(self)):
# yield self[idx]
# the more complicated structure is here to prevent the use of len(),
# which would break the lazy loading
for idx in itertools.count():
try:
yield self[idx]
except IndexError:
break
def __getitem__(self, key):
"""
Get an HDU from the `HDUList`, indexed by number or name.
"""
# If the key is a slice we need to make sure the necessary HDUs
# have been loaded before passing the slice on to super.
if isinstance(key, slice):
max_idx = key.stop
# Check for and handle the case when no maximum was
# specified (e.g. [1:]).
if max_idx is None:
# We need all of the HDUs, so load them
# and reset the maximum to the actual length.
max_idx = len(self)
# Just in case the max_idx is negative...
max_idx = self._positive_index_of(max_idx)
number_loaded = super().__len__()
if max_idx >= number_loaded:
# We need more than we have, try loading up to and including
# max_idx. Note we do not try to be clever about skipping HDUs
# even though key.step might conceivably allow it.
for i in range(number_loaded, max_idx):
# Read until max_idx or to the end of the file, whichever
# comes first.
if not self._read_next_hdu():
break
try:
hdus = super().__getitem__(key)
except IndexError as e:
# Raise a more helpful IndexError if the file was not fully read.
if self._read_all:
raise e
else:
raise IndexError(
"HDU not found, possibly because the index "
"is out of range, or because the file was "
"closed before all HDUs were read"
)
else:
return HDUList(hdus)
# Originally this used recursion, but hypothetically an HDU with
# a very large number of HDUs could blow the stack, so use a loop
# instead
try:
return self._try_while_unread_hdus(
super().__getitem__, self._positive_index_of(key)
)
except IndexError as e:
# Raise a more helpful IndexError if the file was not fully read.
if self._read_all:
raise e
else:
raise IndexError(
"HDU not found, possibly because the index "
"is out of range, or because the file was "
"closed before all HDUs were read"
)
def __contains__(self, item):
"""
Returns `True` if ``item`` is an ``HDU`` _in_ ``self`` or a valid
extension specification (e.g., integer extension number, extension
name, or a tuple of extension name and an extension version)
of a ``HDU`` in ``self``.
"""
try:
self._try_while_unread_hdus(self.index_of, item)
except (KeyError, ValueError):
return False
return True
def __setitem__(self, key, hdu):
"""
Set an HDU to the `HDUList`, indexed by number or name.
"""
_key = self._positive_index_of(key)
if isinstance(hdu, (slice, list)):
if _is_int(_key):
raise ValueError("An element in the HDUList must be an HDU.")
for item in hdu:
if not isinstance(item, _BaseHDU):
raise ValueError(f"{item} is not an HDU.")
else:
if not isinstance(hdu, _BaseHDU):
raise ValueError(f"{hdu} is not an HDU.")
try:
self._try_while_unread_hdus(super().__setitem__, _key, hdu)
except IndexError:
raise IndexError(f"Extension {key} is out of bound or not found.")
self._resize = True
self._truncate = False
def __delitem__(self, key):
"""
Delete an HDU from the `HDUList`, indexed by number or name.
"""
if isinstance(key, slice):
end_index = len(self)
else:
key = self._positive_index_of(key)
end_index = len(self) - 1
self._try_while_unread_hdus(super().__delitem__, key)
if key == end_index or key == -1 and not self._resize:
self._truncate = True
else:
self._truncate = False
self._resize = True
# Support the 'with' statement
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
output_verify = self._open_kwargs.get("output_verify", "exception")
self.close(output_verify=output_verify)
[docs]
@classmethod
def fromfile(
cls,
fileobj,
mode=None,
memmap=None,
save_backup=False,
cache=True,
lazy_load_hdus=True,
ignore_missing_simple=False,
**kwargs,
):
"""
Creates an `HDUList` instance from a file-like object.
The actual implementation of ``fitsopen()``, and generally shouldn't
be used directly. Use :func:`open` instead (and see its
documentation for details of the parameters accepted by this method).
"""
return cls._readfrom(
fileobj=fileobj,
mode=mode,
memmap=memmap,
save_backup=save_backup,
cache=cache,
ignore_missing_simple=ignore_missing_simple,
lazy_load_hdus=lazy_load_hdus,
**kwargs,
)
[docs]
@classmethod
def fromstring(cls, data, **kwargs):
"""
Creates an `HDUList` instance from a string or other in-memory data
buffer containing an entire FITS file. Similar to
:meth:`HDUList.fromfile`, but does not accept the mode or memmap
arguments, as they are only relevant to reading from a file on disk.
This is useful for interfacing with other libraries such as CFITSIO,
and may also be useful for streaming applications.
Parameters
----------
data : str, buffer-like, etc.
A string or other memory buffer containing an entire FITS file.
Buffer-like objects include :class:`~bytes`, :class:`~bytearray`,
:class:`~memoryview`, and :class:`~numpy.ndarray`.
It should be noted that if that memory is read-only (such as a
Python string) the returned :class:`HDUList`'s data portions will
also be read-only.
**kwargs : dict
Optional keyword arguments. See
:func:`astropy.io.fits.open` for details.
Returns
-------
hdul : HDUList
An :class:`HDUList` object representing the in-memory FITS file.
"""
try:
# Test that the given object supports the buffer interface by
# ensuring an ndarray can be created from it
np.ndarray((), dtype="ubyte", buffer=data)
except TypeError:
raise TypeError(
f"The provided object {data} does not contain an underlying "
"memory buffer. fromstring() requires an object that "
"supports the buffer interface such as bytes, buffer, "
"memoryview, ndarray, etc. This restriction is to ensure "
"that efficient access to the array/table data is possible."
)
return cls._readfrom(data=data, **kwargs)
[docs]
def fileinfo(self, index):
"""
Returns a dictionary detailing information about the locations
of the indexed HDU within any associated file. The values are
only valid after a read or write of the associated file with
no intervening changes to the `HDUList`.
Parameters
----------
index : int
Index of HDU for which info is to be returned.
Returns
-------
fileinfo : dict or None
The dictionary details information about the locations of
the indexed HDU within an associated file. Returns `None`
when the HDU is not associated with a file.
Dictionary contents:
========== ========================================================
Key Value
========== ========================================================
file File object associated with the HDU
filename Name of associated file object
filemode Mode in which the file was opened (readonly,
update, append, denywrite, ostream)
resized Flag that when `True` indicates that the data has been
resized since the last read/write so the returned values
may not be valid.
hdrLoc Starting byte location of header in file
datLoc Starting byte location of data block in file
datSpan Data size including padding
========== ========================================================
"""
if self._file is not None:
output = self[index].fileinfo()
if not output:
# OK, the HDU associated with this index is not yet
# tied to the file associated with the HDUList. The only way
# to get the file object is to check each of the HDU's in the
# list until we find the one associated with the file.
f = None
for hdu in self:
info = hdu.fileinfo()
if info:
f = info["file"]
fm = info["filemode"]
break
output = {
"file": f,
"filemode": fm,
"hdrLoc": None,
"datLoc": None,
"datSpan": None,
}
output["filename"] = self._file.name
output["resized"] = self._wasresized()
else:
output = None
return output
def __copy__(self):
"""
Return a shallow copy of an HDUList.
Returns
-------
copy : `HDUList`
A shallow copy of this `HDUList` object.
"""
return self[:]
# Syntactic sugar for `__copy__()` magic method
copy = __copy__
def __deepcopy__(self, memo=None):
return HDUList([hdu.copy() for hdu in self])
[docs]
def pop(self, index=-1):
"""Remove an item from the list and return it.
Parameters
----------
index : int, str, tuple of (string, int), optional
An integer value of ``index`` indicates the position from which
``pop()`` removes and returns an HDU. A string value or a tuple
of ``(string, int)`` functions as a key for identifying the
HDU to be removed and returned. If ``key`` is a tuple, it is
of the form ``(key, ver)`` where ``ver`` is an ``EXTVER``
value that must match the HDU being searched for.
If the key is ambiguous (e.g. there are multiple 'SCI' extensions)
the first match is returned. For a more precise match use the
``(name, ver)`` pair.
If even the ``(name, ver)`` pair is ambiguous the numeric index
must be used to index the duplicate HDU.
Returns
-------
hdu : BaseHDU
The HDU object at position indicated by ``index`` or having name
and version specified by ``index``.
"""
# Make sure that HDUs are loaded before attempting to pop
self.readall()
list_index = self.index_of(index)
return super().pop(list_index)
[docs]
def insert(self, index, hdu):
"""
Insert an HDU into the `HDUList` at the given ``index``.
Parameters
----------
index : int
Index before which to insert the new HDU.
hdu : BaseHDU
The HDU object to insert
"""
if not isinstance(hdu, _BaseHDU):
raise ValueError(f"{hdu} is not an HDU.")
num_hdus = len(self)
if index == 0 or num_hdus == 0:
if num_hdus != 0:
# We are inserting a new Primary HDU so we need to
# make the current Primary HDU into an extension HDU.
if isinstance(self[0], GroupsHDU):
raise ValueError(
"The current Primary HDU is a GroupsHDU. "
"It can't be made into an extension HDU, "
"so another HDU cannot be inserted before it."
)
hdu1 = ImageHDU(self[0].data, self[0].header)
# Insert it into position 1, then delete HDU at position 0.
super().insert(1, hdu1)
super().__delitem__(0)
if not isinstance(hdu, (PrimaryHDU, _NonstandardHDU)):
# You passed in an Extension HDU but we need a Primary HDU.
# If you provided an ImageHDU then we can convert it to
# a primary HDU and use that.
if isinstance(hdu, ImageHDU):
hdu = PrimaryHDU(hdu.data, hdu.header)
else:
# You didn't provide an ImageHDU so we create a
# simple Primary HDU and append that first before
# we append the new Extension HDU.
phdu = PrimaryHDU()
super().insert(0, phdu)
index = 1
else:
if isinstance(hdu, GroupsHDU):
raise ValueError("A GroupsHDU must be inserted as a Primary HDU.")
if isinstance(hdu, PrimaryHDU):
# You passed a Primary HDU but we need an Extension HDU
# so create an Extension HDU from the input Primary HDU.
hdu = ImageHDU(hdu.data, hdu.header)
super().insert(index, hdu)
hdu._new = True
self._resize = True
self._truncate = False
# make sure the EXTEND keyword is in primary HDU if there is extension
self.update_extend()
[docs]
def append(self, hdu):
"""
Append a new HDU to the `HDUList`.
Parameters
----------
hdu : BaseHDU
HDU to add to the `HDUList`.
"""
if not isinstance(hdu, _BaseHDU):
raise ValueError("HDUList can only append an HDU.")
if len(self) > 0:
if isinstance(hdu, GroupsHDU):
raise ValueError("Can't append a GroupsHDU to a non-empty HDUList")
if isinstance(hdu, PrimaryHDU):
# You passed a Primary HDU but we need an Extension HDU
# so create an Extension HDU from the input Primary HDU.
# TODO: This isn't necessarily sufficient to copy the HDU;
# _header_offset and friends need to be copied too.
hdu = ImageHDU(hdu.data, hdu.header)
else:
if not isinstance(hdu, (PrimaryHDU, _NonstandardHDU)):
# You passed in an Extension HDU but we need a Primary
# HDU.
# If you provided an ImageHDU then we can convert it to
# a primary HDU and use that.
if isinstance(hdu, ImageHDU):
hdu = PrimaryHDU(hdu.data, hdu.header)
else:
# You didn't provide an ImageHDU so we create a
# simple Primary HDU and append that first before
# we append the new Extension HDU.
phdu = PrimaryHDU()
super().append(phdu)
super().append(hdu)
hdu._new = True
self._resize = True
self._truncate = False
# make sure the EXTEND keyword is in primary HDU if there is extension
self.update_extend()
[docs]
def index_of(self, key):
"""
Get the index of an HDU from the `HDUList`.
Parameters
----------
key : int, str, tuple of (string, int) or BaseHDU
The key identifying the HDU. If ``key`` is a tuple, it is of the
form ``(name, ver)`` where ``ver`` is an ``EXTVER`` value that must
match the HDU being searched for.
If the key is ambiguous (e.g. there are multiple 'SCI' extensions)
the first match is returned. For a more precise match use the
``(name, ver)`` pair.
If even the ``(name, ver)`` pair is ambiguous (it shouldn't be
but it's not impossible) the numeric index must be used to index
the duplicate HDU.
When ``key`` is an HDU object, this function returns the
index of that HDU object in the ``HDUList``.
Returns
-------
index : int
The index of the HDU in the `HDUList`.
Raises
------
ValueError
If ``key`` is an HDU object and it is not found in the ``HDUList``.
KeyError
If an HDU specified by the ``key`` that is an extension number,
extension name, or a tuple of extension name and version is not
found in the ``HDUList``.
"""
if _is_int(key):
return key
elif isinstance(key, tuple):
_key, _ver = key
elif isinstance(key, _BaseHDU):
return self.index(key)
else:
_key = key
_ver = None
if not isinstance(_key, str):
raise KeyError(
f"{type(self).__name__} indices must be integers, extension "
f"names as strings, or (extname, version) tuples; got {_key}"
)
_key = (_key.strip()).upper()
found = None
for idx, hdu in enumerate(self):
name = hdu.name
if isinstance(name, str):
name = name.strip().upper()
# 'PRIMARY' should always work as a reference to the first HDU
if (name == _key or (_key == "PRIMARY" and idx == 0)) and (
_ver is None or _ver == hdu.ver
):
found = idx
break
if found is None:
raise KeyError(f"Extension {key!r} not found.")
else:
return found
def _positive_index_of(self, key):
"""
Same as index_of, but ensures always returning a positive index
or zero.
(Really this should be called non_negative_index_of but it felt
too long.)
This means that if the key is a negative integer, we have to
convert it to the corresponding positive index. This means
knowing the length of the HDUList, which in turn means loading
all HDUs. Therefore using negative indices on HDULists is inherently
inefficient.
"""
index = self.index_of(key)
if index >= 0:
return index
if abs(index) > len(self):
raise IndexError(f"Extension {index} is out of bound or not found.")
return len(self) + index
[docs]
def readall(self):
"""
Read data of all HDUs into memory.
"""
while self._read_next_hdu():
pass
[docs]
@ignore_sigint
def flush(self, output_verify="fix", verbose=False):
"""
Force a write of the `HDUList` back to the file (for append and
update modes only).
Parameters
----------
output_verify : str
Output verification option. Must be one of ``"fix"``,
``"silentfix"``, ``"ignore"``, ``"warn"``, or
``"exception"``. May also be any combination of ``"fix"`` or
``"silentfix"`` with ``"+ignore"``, ``+warn``, or ``+exception"
(e.g. ``"fix+warn"``). See :ref:`astropy:verify` for more info.
verbose : bool
When `True`, print verbose messages
"""
if self._file.mode not in ("append", "update", "ostream"):
warnings.warn(
f"Flush for '{self._file.mode}' mode is not supported.",
AstropyUserWarning,
)
return
save_backup = self._open_kwargs.get("save_backup", False)
if save_backup and self._file.mode in ("append", "update"):
filename = self._file.name
if os.path.exists(filename):
# The file doesn't actually exist anymore for some reason
# then there's no point in trying to make a backup
backup = filename + ".bak"
idx = 1
while os.path.exists(backup):
backup = filename + ".bak." + str(idx)
idx += 1
warnings.warn(
f"Saving a backup of {filename} to {backup}.", AstropyUserWarning
)
try:
shutil.copy(filename, backup)
except OSError as exc:
raise OSError(
f"Failed to save backup to destination {filename}"
) from exc
self.verify(option=output_verify)
if self._file.mode in ("append", "ostream"):
for hdu in self:
if verbose:
try:
extver = str(hdu._header["extver"])
except KeyError:
extver = ""
# only append HDU's which are "new"
if hdu._new:
hdu._prewriteto(checksum=hdu._output_checksum)
with _free_space_check(self):
hdu._writeto(self._file)
if verbose:
print("append HDU", hdu.name, extver)
hdu._new = False
hdu._postwriteto()
elif self._file.mode == "update":
self._flush_update()
[docs]
def update_extend(self):
"""
Make sure that if the primary header needs the keyword ``EXTEND`` that
it has it and it is correct.
"""
if not len(self):
return
if not isinstance(self[0], PrimaryHDU):
# A PrimaryHDU will be automatically inserted at some point, but it
# might not have been added yet
return
hdr = self[0].header
def get_first_ext():
try:
return self[1]
except IndexError:
return None
if "EXTEND" in hdr:
if not hdr["EXTEND"] and get_first_ext() is not None:
hdr["EXTEND"] = True
elif get_first_ext() is not None:
if hdr["NAXIS"] == 0:
hdr.set("EXTEND", True, after="NAXIS")
else:
n = hdr["NAXIS"]
hdr.set("EXTEND", True, after="NAXIS" + str(n))
[docs]
def writeto(
self, fileobj, output_verify="exception", overwrite=False, checksum=False
):
"""
Write the `HDUList` to a new file.
Parameters
----------
fileobj : str, file-like or `pathlib.Path`
File to write to. If a file object, must be opened in a
writeable mode.
output_verify : str
Output verification option. Must be one of ``"fix"``,
``"silentfix"``, ``"ignore"``, ``"warn"``, or
``"exception"``. May also be any combination of ``"fix"`` or
``"silentfix"`` with ``"+ignore"``, ``+warn``, or ``+exception"
(e.g. ``"fix+warn"``). See :ref:`astropy:verify` for more info.
overwrite : bool, optional
If ``True``, overwrite the output file if it exists. Raises an
``OSError`` if ``False`` and the output file exists. Default is
``False``.
checksum : bool
When `True` adds both ``DATASUM`` and ``CHECKSUM`` cards
to the headers of all HDU's written to the file.
Notes
-----
gzip, zip and bzip2 compression algorithms are natively supported.
Compression mode is determined from the filename extension
('.gz', '.zip' or '.bz2' respectively). It is also possible to pass a
compressed file object, e.g. `gzip.GzipFile`.
"""
if len(self) == 0:
warnings.warn("There is nothing to write.", AstropyUserWarning)
return
self.verify(option=output_verify)
# make sure the EXTEND keyword is there if there is extension
self.update_extend()
if fileobj is sys.stdout:
# special case stdout for debugging convenience
# see https://github.com/astropy/astropy/issues/3427
fileobj = fileobj.buffer
# make note of whether the input file object is already open, in which
# case we should not close it after writing (that should be the job
# of the caller)
closed = isinstance(fileobj, str) or fileobj_closed(fileobj)
mode = FILE_MODES[fileobj_mode(fileobj)] if isfile(fileobj) else "ostream"
# This can accept an open file object that's open to write only, or in
# append/update modes but only if the file doesn't exist.
fileobj = _File(fileobj, mode=mode, overwrite=overwrite)
hdulist = self.fromfile(fileobj)
try:
dirname = os.path.dirname(hdulist._file.name)
except (AttributeError, TypeError):
dirname = None
try:
with _free_space_check(self, dirname=dirname):
for hdu in self:
hdu._prewriteto(checksum=checksum)
hdu._writeto(hdulist._file)
hdu._postwriteto()
finally:
hdulist.close(output_verify=output_verify, closed=closed)
[docs]
def close(self, output_verify="exception", verbose=False, closed=True):
"""
Close the associated FITS file and memmap object, if any.
Parameters
----------
output_verify : str
Output verification option. Must be one of ``"fix"``,
``"silentfix"``, ``"ignore"``, ``"warn"``, or
``"exception"``. May also be any combination of ``"fix"`` or
``"silentfix"`` with ``"+ignore"``, ``+warn``, or ``+exception"
(e.g. ``"fix+warn"``). See :ref:`astropy:verify` for more info.
verbose : bool
When `True`, print out verbose messages.
closed : bool
When `True`, close the underlying file object.
"""
try:
if (
self._file
and self._file.mode in ("append", "update")
and not self._file.closed
):
self.flush(output_verify=output_verify, verbose=verbose)
finally:
if self._file and closed and hasattr(self._file, "close"):
self._file.close()
# Give individual HDUs an opportunity to do on-close cleanup
for hdu in self:
hdu._close(closed=closed)
[docs]
def info(self, output=None):
"""
Summarize the info of the HDUs in this `HDUList`.
Note that this function prints its results to the console---it
does not return a value.
Parameters
----------
output : file-like or bool, optional
A file-like object to write the output to. If `False`, does not
output to a file and instead returns a list of tuples representing
the HDU info. Writes to ``sys.stdout`` by default.
"""
if output is None:
output = sys.stdout
if self._file is None:
name = "(No file associated with this HDUList)"
else:
name = self._file.name
results = [
f"Filename: {name}",
"No. Name Ver Type Cards Dimensions Format",
]
format = "{:3d} {:10} {:3} {:11} {:5d} {} {} {}"
default = ("", "", "", 0, (), "", "")
for idx, hdu in enumerate(self):
summary = hdu._summary()
if len(summary) < len(default):
summary += default[len(summary) :]
summary = (idx,) + summary
if output:
results.append(format.format(*summary))
else:
results.append(summary)
if output:
output.write("\n".join(results))
output.write("\n")
output.flush()
else:
return results[2:]
[docs]
def filename(self):
"""
Return the file name associated with the HDUList object if one exists.
Otherwise returns None.
Returns
-------
filename : str
A string containing the file name associated with the HDUList
object if an association exists. Otherwise returns None.
"""
if self._file is not None:
if hasattr(self._file, "name"):
return self._file.name
return None
@classmethod
def _readfrom(
cls,
fileobj=None,
data=None,
mode=None,
memmap=None,
cache=True,
lazy_load_hdus=True,
ignore_missing_simple=False,
*,
use_fsspec=None,
fsspec_kwargs=None,
decompress_in_memory=False,
**kwargs,
):
"""
Provides the implementations from HDUList.fromfile and
HDUList.fromstring, both of which wrap this method, as their
implementations are largely the same.
"""
if fileobj is not None:
if not isinstance(fileobj, _File):
# instantiate a FITS file object (ffo)
fileobj = _File(
fileobj,
mode=mode,
memmap=memmap,
cache=cache,
use_fsspec=use_fsspec,
fsspec_kwargs=fsspec_kwargs,
decompress_in_memory=decompress_in_memory,
)
# The Astropy mode is determined by the _File initializer if the
# supplied mode was None
mode = fileobj.mode
hdulist = cls(file=fileobj)
else:
if mode is None:
# The default mode
mode = "readonly"
hdulist = cls(file=data)
# This method is currently only called from HDUList.fromstring and
# HDUList.fromfile. If fileobj is None then this must be the
# fromstring case; the data type of ``data`` will be checked in the
# _BaseHDU.fromstring call.
if (
not ignore_missing_simple
and hdulist._file
and hdulist._file.mode != "ostream"
and hdulist._file.size > 0
):
pos = hdulist._file.tell()
# FITS signature is supposed to be in the first 30 bytes, but to
# allow reading various invalid files we will check in the first
# card (80 bytes).
simple = hdulist._file.read(80)
match_sig = simple[:29] == FITS_SIGNATURE[:-1] and simple[29:30] in (
b"T",
b"F",
)
if not match_sig:
# Check the SIMPLE card is there but not written correctly
match_sig_relaxed = re.match(rb"SIMPLE\s*=\s*[T|F]", simple)
if match_sig_relaxed:
warnings.warn(
"Found a SIMPLE card but its format doesn't"
" respect the FITS Standard",
VerifyWarning,
)
else:
if hdulist._file.close_on_error:
hdulist._file.close()
raise OSError(
"No SIMPLE card found, this file does not appear to "
"be a valid FITS file. If this is really a FITS file, "
"try with ignore_missing_simple=True"
)
hdulist._file.seek(pos)
# Store additional keyword args that were passed to fits.open
hdulist._open_kwargs = kwargs
if fileobj is not None and fileobj.writeonly:
# Output stream--not interested in reading/parsing
# the HDUs--just writing to the output file
return hdulist
# Make sure at least the PRIMARY HDU can be read
read_one = hdulist._read_next_hdu()
# If we're trying to read only and no header units were found,
# raise an exception
if not read_one and mode in ("readonly", "denywrite"):
# Close the file if necessary (issue #6168)
if hdulist._file.close_on_error:
hdulist._file.close()
raise OSError("Empty or corrupt FITS file")
if not lazy_load_hdus or kwargs.get("checksum") is True:
# Go ahead and load all HDUs
while hdulist._read_next_hdu():
pass
# initialize/reset attributes to be used in "update/append" mode
hdulist._resize = False
hdulist._truncate = False
return hdulist
def _try_while_unread_hdus(self, func, *args, **kwargs):
"""
Attempt an operation that accesses an HDU by index/name
that can fail if not all HDUs have been read yet. Keep
reading HDUs until the operation succeeds or there are no
more HDUs to read.
"""
while True:
try:
return func(*args, **kwargs)
except Exception:
if self._read_next_hdu():
continue
else:
raise
def _read_next_hdu(self):
"""
Lazily load a single HDU from the fileobj or data string the `HDUList`
was opened from, unless no further HDUs are found.
Returns True if a new HDU was loaded, or False otherwise.
"""
if self._read_all:
return False
fileobj, data, kwargs = self._file, self._data, self._open_kwargs
if fileobj is not None and fileobj.closed:
return False
try:
self._in_read_next_hdu = True
# read all HDUs
try:
if fileobj is not None:
try:
# Make sure we're back to the end of the last read
# HDU
if len(self) > 0:
last = self[len(self) - 1]
if last._data_offset is not None:
offset = last._data_offset + last._data_size
fileobj.seek(offset, os.SEEK_SET)
hdu = _BaseHDU.readfrom(fileobj, **kwargs)
except EOFError:
self._read_all = True
return False
except OSError:
# Close the file: see
# https://github.com/astropy/astropy/issues/6168
#
if self._file.close_on_error:
self._file.close()
if fileobj.writeonly:
self._read_all = True
return False
else:
raise
else:
if not data:
self._read_all = True
return False
hdu = _BaseHDU.fromstring(data, **kwargs)
self._data = data[hdu._data_offset + hdu._data_size :]
if not kwargs.get("disable_image_compression", False):
if isinstance(hdu, BinTableHDU) and CompImageHDU.match_header(
hdu.header
):
kwargs_comp = {
key: val
for key, val in kwargs.items()
if key in ("scale_back", "uint", "do_not_scale_image_data")
}
hdu = CompImageHDU(bintable=hdu, **kwargs_comp)
super().append(hdu)
if len(self) == 1:
# Check for an extension HDU and update the EXTEND
# keyword of the primary HDU accordingly
self.update_extend()
hdu._new = False
if "checksum" in kwargs:
hdu._output_checksum = kwargs["checksum"]
# check in the case there is extra space after the last HDU or
# corrupted HDU
except (VerifyError, ValueError) as exc:
warnings.warn(
f"Error validating header for HDU #{len(self)} (note: Astropy "
f"uses zero-based indexing).\n{indent(str(exc))}\n"
"There may be extra bytes after the last HDU or the "
"file is corrupted.",
VerifyWarning,
)
del exc
self._read_all = True
return False
finally:
self._in_read_next_hdu = False
return True
def _verify(self, option="warn"):
errs = _ErrList([], unit="HDU")
# the first (0th) element must be a primary HDU
if (
len(self) > 0
and (not isinstance(self[0], PrimaryHDU))
and (not isinstance(self[0], _NonstandardHDU))
):
err_text = "HDUList's 0th element is not a primary HDU."
fix_text = "Fixed by inserting one as 0th HDU."
def fix(self=self):
self.insert(0, PrimaryHDU())
err = self.run_option(option, err_text=err_text, fix_text=fix_text, fix=fix)
errs.append(err)
if len(self) > 1 and (
"EXTEND" not in self[0].header or self[0].header["EXTEND"] is not True
):
err_text = (
"Primary HDU does not contain an EXTEND keyword "
"equal to T even though there are extension HDUs."
)
fix_text = "Fixed by inserting or updating the EXTEND keyword."
def fix(header=self[0].header):
naxis = header["NAXIS"]
if naxis == 0:
after = "NAXIS"
else:
after = "NAXIS" + str(naxis)
header.set("EXTEND", value=True, after=after)
errs.append(
self.run_option(option, err_text=err_text, fix_text=fix_text, fix=fix)
)
# each element calls their own verify
for idx, hdu in enumerate(self):
if idx > 0 and (not isinstance(hdu, ExtensionHDU)):
err_text = f"HDUList's element {idx} is not an extension HDU."
err = self.run_option(option, err_text=err_text, fixable=False)
errs.append(err)
else:
result = hdu._verify(option)
if result:
errs.append(result)
return errs
def _flush_update(self):
"""Implements flushing changes to a file in update mode."""
for hdu in self:
# Need to all _prewriteto() for each HDU first to determine if
# resizing will be necessary
hdu._prewriteto(checksum=hdu._output_checksum, inplace=True)
try:
self._wasresized()
# if the HDUList is resized, need to write out the entire contents of
# the hdulist to the file.
if self._resize or self._file.compression:
self._flush_resize()
else:
# if not resized, update in place
for hdu in self:
hdu._writeto(self._file, inplace=True)
# reset the modification attributes after updating
for hdu in self:
hdu._header._modified = False
finally:
for hdu in self:
hdu._postwriteto()
def _flush_resize(self):
"""
Implements flushing changes in update mode when parts of one or more HDU
need to be resized.
"""
old_name = self._file.name
old_memmap = self._file.memmap
name = _tmp_name(old_name)
if not self._file.file_like:
old_mode = os.stat(old_name).st_mode
# The underlying file is an actual file object. The HDUList is
# resized, so we need to write it to a tmp file, delete the
# original file, and rename the tmp file to the original file.
if self._file.compression == "gzip":
new_file = gzip.GzipFile(name, mode="ab+")
elif self._file.compression == "bzip2":
if not HAS_BZ2:
raise ModuleNotFoundError(
"This Python installation does not provide the bz2 module."
)
new_file = bz2.BZ2File(name, mode="w")
else:
new_file = name
with self.fromfile(new_file, mode="append") as hdulist:
for hdu in self:
hdu._writeto(hdulist._file, inplace=True, copy=True)
if sys.platform.startswith("win"):
# Collect a list of open mmaps to the data; this well be
# used later. See below.
mmaps = [
(idx, _get_array_mmap(hdu.data), hdu.data)
for idx, hdu in enumerate(self)
if hdu._has_data
]
hdulist._file.close()
self._file.close()
if sys.platform.startswith("win"):
# Close all open mmaps to the data. This is only necessary on
# Windows, which will not allow a file to be renamed or deleted
# until all handles to that file have been closed.
for idx, mmap, arr in mmaps:
if mmap is not None:
mmap.close()
os.remove(self._file.name)
# reopen the renamed new file with "update" mode
os.rename(name, old_name)
os.chmod(old_name, old_mode)
if isinstance(new_file, gzip.GzipFile):
old_file = gzip.GzipFile(old_name, mode="rb+")
else:
old_file = old_name
ffo = _File(old_file, mode="update", memmap=old_memmap)
self._file = ffo
for hdu in self:
# Need to update the _file attribute and close any open mmaps
# on each HDU
if hdu._has_data and _get_array_mmap(hdu.data) is not None:
del hdu.data
hdu._file = ffo
if sys.platform.startswith("win"):
# On Windows, all the original data mmaps were closed above.
# However, it's possible that the user still has references to
# the old data which would no longer work (possibly even cause
# a segfault if they try to access it). This replaces the
# buffers used by the original arrays with the buffers of mmap
# arrays created from the new file. This seems to work, but
# it's a flaming hack and carries no guarantees that it won't
# lead to odd behavior in practice. Better to just not keep
# references to data from files that had to be resized upon
# flushing (on Windows--again, this is no problem on Linux).
for idx, mmap, arr in mmaps:
if mmap is None:
continue
if NUMPY_LT_2_0:
# Note that this hack is only possible on numpy 1.x:
# in 2.x, we cannot write directly to the data attribute
# https://github.com/numpy/numpy/issues/8628
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=DeprecationWarning)
arr.data = self[idx].data.data
elif sys.getrefcount(arr) > 2:
# 2 is the minimum number of references to this object
# counting `arr` and a reference as an argument to getrefcount(),
# see https://docs.python.org/3/library/sys.html#sys.getrefcount
warnings.warn(
"Memory map object was closed but appears to still "
"be referenced. Further access will result in undefined "
"behavior (possibly including segmentation faults).",
category=UserWarning,
stacklevel=2,
)
del mmaps # Just to be sure
else:
# The underlying file is not a file object, it is a file like
# object. We can't write out to a file, we must update the file
# like object in place. To do this, we write out to a temporary
# file, then delete the contents in our file like object, then
# write the contents of the temporary file to the now empty file
# like object.
self.writeto(name)
hdulist = self.fromfile(name)
ffo = self._file
ffo.truncate(0)
ffo.seek(0)
for hdu in hdulist:
hdu._writeto(ffo, inplace=True, copy=True)
# Close the temporary file and delete it.
hdulist.close()
os.remove(hdulist._file.name)
# reset the resize attributes after updating
self._resize = False
self._truncate = False
for hdu in self:
hdu._header._modified = False
hdu._new = False
hdu._file = ffo
def _wasresized(self, verbose=False):
"""
Determine if any changes to the HDUList will require a file resize
when flushing the file.
Side effect of setting the objects _resize attribute.
"""
if not self._resize:
# determine if any of the HDU is resized
for hdu in self:
# for CompImageHDU, we need to handle things a little differently
# because the HDU matching the header/data on disk is hdu._bintable
if isinstance(hdu, CompImageHDU):
hdu = hdu._tmp_bintable
if hdu is None:
continue
# Header:
nbytes = len(str(hdu._header))
if nbytes != (hdu._data_offset - hdu._header_offset):
self._resize = True
self._truncate = False
if verbose:
print("One or more header is resized.")
break
# Data:
if not hdu._has_data:
continue
nbytes = hdu.size
nbytes = nbytes + _pad_length(nbytes)
if nbytes != hdu._data_size:
self._resize = True
self._truncate = False
if verbose:
print("One or more data area is resized.")
break
if self._truncate:
try:
self._file.truncate(hdu._data_offset + hdu._data_size)
except OSError:
self._resize = True
self._truncate = False
return self._resize