# Licensed under a 3-clause BSD style license - see PYFITS.rst
import mmap
import sys
import warnings
import numpy as np
from astropy.io.fits.header import Header
from astropy.io.fits.util import (
_is_dask_array,
_is_int,
_is_pseudo_integer,
_pseudo_zero,
)
from astropy.io.fits.verify import VerifyWarning
from astropy.utils import isiterable, lazyproperty
from .base import BITPIX2DTYPE, DELAYED, DTYPE2BITPIX, ExtensionHDU, _ValidHDU
__all__ = ["ImageHDU", "PrimaryHDU", "Section"]
class _ImageBaseHDU(_ValidHDU):
"""FITS image HDU base class.
Attributes
----------
header
image header
data
image data
"""
standard_keyword_comments = {
"SIMPLE": "conforms to FITS standard",
"XTENSION": "Image extension",
"BITPIX": "array data type",
"NAXIS": "number of array dimensions",
"GROUPS": "has groups",
"PCOUNT": "number of parameters",
"GCOUNT": "number of groups",
}
def __init__(
self,
data=None,
header=None,
do_not_scale_image_data=False,
uint=True,
scale_back=False,
ignore_blank=False,
**kwargs,
):
from .groups import GroupsHDU
super().__init__(data=data, header=header)
if data is DELAYED:
# Presumably if data is DELAYED then this HDU is coming from an
# open file, and was not created in memory
if header is None:
# this should never happen
raise ValueError("No header to setup HDU.")
else:
# TODO: Some of this card manipulation should go into the
# PrimaryHDU and GroupsHDU subclasses
# construct a list of cards of minimal header
if isinstance(self, ExtensionHDU):
c0 = ("XTENSION", "IMAGE", self.standard_keyword_comments["XTENSION"])
else:
c0 = ("SIMPLE", True, self.standard_keyword_comments["SIMPLE"])
cards = [
c0,
("BITPIX", 8, self.standard_keyword_comments["BITPIX"]),
("NAXIS", 0, self.standard_keyword_comments["NAXIS"]),
]
if isinstance(self, GroupsHDU):
cards.append(("GROUPS", True, self.standard_keyword_comments["GROUPS"]))
if isinstance(self, (ExtensionHDU, GroupsHDU)):
cards.append(("PCOUNT", 0, self.standard_keyword_comments["PCOUNT"]))
cards.append(("GCOUNT", 1, self.standard_keyword_comments["GCOUNT"]))
if header is not None:
orig = header.copy()
header = Header(cards)
header.extend(orig, strip=True, update=True, end=True)
else:
header = Header(cards)
self._header = header
self._do_not_scale_image_data = do_not_scale_image_data
self._uint = uint
self._scale_back = scale_back
# Keep track of whether BZERO/BSCALE were set from the header so that
# values for self._orig_bzero and self._orig_bscale can be set
# properly, if necessary, once the data has been set.
bzero_in_header = "BZERO" in self._header
bscale_in_header = "BSCALE" in self._header
self._bzero = self._header.get("BZERO", 0)
self._bscale = self._header.get("BSCALE", 1)
# Save off other important values from the header needed to interpret
# the image data
self._axes = [
self._header.get("NAXIS" + str(axis + 1), 0)
for axis in range(self._header.get("NAXIS", 0))
]
# Not supplying a default for BITPIX makes sense because BITPIX
# is either in the header or should be determined from the dtype of
# the data (which occurs when the data is set).
self._bitpix = self._header.get("BITPIX")
self._gcount = self._header.get("GCOUNT", 1)
self._pcount = self._header.get("PCOUNT", 0)
self._blank = None if ignore_blank else self._header.get("BLANK")
self._verify_blank()
self._orig_bitpix = self._bitpix
self._orig_blank = self._header.get("BLANK")
# These get set again below, but need to be set to sensible defaults
# here.
self._orig_bzero = self._bzero
self._orig_bscale = self._bscale
# Set the name attribute if it was provided (if this is an ImageHDU
# this will result in setting the EXTNAME keyword of the header as
# well)
if kwargs.get("name"):
self.name = kwargs["name"]
if kwargs.get("ver"):
self.ver = kwargs["ver"]
# Set to True if the data or header is replaced, indicating that
# update_header should be called
self._modified = False
if data is DELAYED:
if not do_not_scale_image_data and (self._bscale != 1 or self._bzero != 0):
# This indicates that when the data is accessed or written out
# to a new file it will need to be rescaled
self._data_needs_rescale = True
return
else:
# Setting data will update the header and set _bitpix, _bzero,
# and _bscale to the appropriate BITPIX for the data, and always
# sets _bzero=0 and _bscale=1.
self.data = data
# Check again for BITPIX/BSCALE/BZERO in case they changed when the
# data was assigned. This can happen, for example, if the input
# data is an unsigned int numpy array.
self._bitpix = self._header.get("BITPIX")
# Do not provide default values for BZERO and BSCALE here because
# the keywords will have been deleted in the header if appropriate
# after scaling. We do not want to put them back in if they
# should not be there.
self._bzero = self._header.get("BZERO")
self._bscale = self._header.get("BSCALE")
# Handle case where there was no BZERO/BSCALE in the initial header
# but there should be a BSCALE/BZERO now that the data has been set.
if not bzero_in_header:
self._orig_bzero = self._bzero
if not bscale_in_header:
self._orig_bscale = self._bscale
@classmethod
def match_header(cls, header):
"""
_ImageBaseHDU is sort of an abstract class for HDUs containing image
data (as opposed to table data) and should never be used directly.
"""
raise NotImplementedError
@property
def is_image(self):
return True
@property
def section(self):
"""
Access a section of the image array without loading the entire array
into memory. The :class:`Section` object returned by this attribute is
not meant to be used directly by itself. Rather, slices of the section
return the appropriate slice of the data, and loads *only* that section
into memory.
Sections are useful for retrieving a small subset of data from a remote
file that has been opened with the ``use_fsspec=True`` parameter.
For example, you can use this feature to download a small cutout from
a large FITS image hosted in the Amazon S3 cloud (see the
:ref:`astropy:fits-cloud-files` section of the Astropy
documentation for more details.)
For local files, sections are mostly obsoleted by memmap support, but
should still be used to deal with very large scaled images.
Note that sections cannot currently be written to. Moreover, any
in-memory updates to the image's ``.data`` property may not be
reflected in the slices obtained via ``.section``. See the
:ref:`astropy:data-sections` section of the documentation for
more details.
"""
return Section(self)
@property
def shape(self):
"""
Shape of the image array--should be equivalent to ``self.data.shape``.
"""
# Determine from the values read from the header
return tuple(reversed(self._axes))
@property
def header(self):
return self._header
@header.setter
def header(self, header):
self._header = header
self._modified = True
self.update_header()
@lazyproperty
def data(self):
"""
Image/array data as a `~numpy.ndarray`.
Please remember that the order of axes on an Numpy array are opposite
of the order specified in the FITS file. For example for a 2D image
the "rows" or y-axis are the first dimension, and the "columns" or
x-axis are the second dimension.
If the data is scaled using the BZERO and BSCALE parameters, this
attribute returns the data scaled to its physical values unless the
file was opened with ``do_not_scale_image_data=True``.
"""
if len(self._axes) < 1:
return
data = self._get_scaled_image_data(self._data_offset, self.shape)
self._update_header_scale_info(data.dtype)
return data
@data.setter
def data(self, data):
if "data" in self.__dict__ and self.__dict__["data"] is not None:
if self.__dict__["data"] is data:
return
else:
self._data_replaced = True
was_unsigned = _is_pseudo_integer(self.__dict__["data"].dtype)
else:
self._data_replaced = True
was_unsigned = False
if data is not None:
if not isinstance(data, np.ndarray) and not _is_dask_array(data):
# Try to coerce the data into a numpy array--this will work, on
# some level, for most objects
try:
data = np.array(data)
except Exception: # pragma: no cover
raise TypeError(
f"data object {data!r} could not be coerced into an ndarray"
)
if data.shape == ():
raise TypeError(
f"data object {data!r} should have at least one dimension"
)
self.__dict__["data"] = data
self._modified = True
if data is None:
self._axes = []
else:
# Set new values of bitpix, bzero, and bscale now, but wait to
# revise original values until header is updated.
self._bitpix = DTYPE2BITPIX[data.dtype.name]
self._bscale = 1
self._bzero = 0
self._blank = None
self._axes = list(data.shape)
self._axes.reverse()
# Update the header, including adding BZERO/BSCALE if new data is
# unsigned. Does not change the values of self._bitpix,
# self._orig_bitpix, etc.
self.update_header()
if data is not None and was_unsigned:
self._update_header_scale_info(data.dtype)
# Keep _orig_bitpix as it was until header update is done, then
# set it, to allow easier handling of the case of unsigned
# integer data being converted to something else. Setting these here
# is needed only for the case do_not_scale_image_data=True when
# setting the data to unsigned int.
# If necessary during initialization, i.e. if BSCALE and BZERO were
# not in the header but the data was unsigned, the attributes below
# will be update in __init__.
self._orig_bitpix = self._bitpix
self._orig_bscale = self._bscale
self._orig_bzero = self._bzero
# returning the data signals to lazyproperty that we've already handled
# setting self.__dict__['data']
return data
@property
def _data_shape(self):
return self.data.shape
def update_header(self):
"""
Update the header keywords to agree with the data.
"""
if not (
self._modified
or self._header._modified
or (self._has_data and self.shape != self._data_shape)
):
# Not likely that anything needs updating
return
old_naxis = self._header.get("NAXIS", 0)
if "BITPIX" not in self._header:
bitpix_comment = self.standard_keyword_comments["BITPIX"]
else:
bitpix_comment = self._header.comments["BITPIX"]
# Update the BITPIX keyword and ensure it's in the correct
# location in the header
self._header.set("BITPIX", self._bitpix, bitpix_comment, after=0)
# If the data's shape has changed (this may have happened without our
# noticing either via a direct update to the data.shape attribute) we
# need to update the internal self._axes
if self._has_data and self.shape != self._data_shape:
self._axes = list(self._data_shape)
self._axes.reverse()
# Update the NAXIS keyword and ensure it's in the correct location in
# the header
if "NAXIS" in self._header:
naxis_comment = self._header.comments["NAXIS"]
else:
naxis_comment = self.standard_keyword_comments["NAXIS"]
self._header.set("NAXIS", len(self._axes), naxis_comment, after="BITPIX")
# TODO: This routine is repeated in several different classes--it
# should probably be made available as a method on all standard HDU
# types
# add NAXISi if it does not exist
for idx, axis in enumerate(self._axes):
naxisn = "NAXIS" + str(idx + 1)
if naxisn in self._header:
self._header[naxisn] = axis
else:
if idx == 0:
after = "NAXIS"
else:
after = "NAXIS" + str(idx)
self._header.set(naxisn, axis, after=after)
# delete extra NAXISi's
for idx in range(len(self._axes) + 1, old_naxis + 1):
self._header.remove(f"NAXIS{idx}", ignore_missing=True)
if "BLANK" in self._header:
self._blank = self._header["BLANK"]
# Add BSCALE/BZERO to header if data is unsigned int.
self._update_pseudo_int_scale_keywords()
self._modified = False
def _update_header_scale_info(self, dtype=None):
"""
Delete BSCALE/BZERO from header if necessary.
"""
# Note that _dtype_for_bitpix determines the dtype based on the
# "original" values of bitpix, bscale, and bzero, stored in
# self._orig_bitpix, etc. It contains the logic for determining which
# special cases of BZERO/BSCALE, if any, are auto-detected as following
# the FITS unsigned int convention.
# Added original_was_unsigned with the intent of facilitating the
# special case of do_not_scale_image_data=True and uint=True
# eventually.
# FIXME: unused, maybe it should be useful?
# if self._dtype_for_bitpix() is not None:
# original_was_unsigned = self._dtype_for_bitpix().kind == 'u'
# else:
# original_was_unsigned = False
if self._do_not_scale_image_data or (
self._orig_bzero == 0 and self._orig_bscale == 1
):
return
if dtype is None:
dtype = self._dtype_for_bitpix()
if (
dtype is not None
and dtype.kind == "u"
and (self._scale_back or self._scale_back is None)
):
# Data is pseudo-unsigned integers, and the scale_back option
# was not explicitly set to False, so preserve all the scale
# factors
return
for keyword in ("BSCALE", "BZERO"):
try:
del self._header[keyword]
# Since _update_header_scale_info can, currently, be called
# *after* _prewriteto(), replace these with blank cards so
# the header size doesn't change
self._header.append()
except KeyError:
pass
if dtype is None:
dtype = self._dtype_for_bitpix()
if dtype is not None:
self._header["BITPIX"] = DTYPE2BITPIX[dtype.name]
self._bzero = 0
self._bscale = 1
self._bitpix = self._header["BITPIX"]
self._blank = self._header.pop("BLANK", None)
def scale(self, type=None, option="old", bscale=None, bzero=None):
"""
Scale image data by using ``BSCALE``/``BZERO``.
Call to this method will scale `data` and update the keywords of
``BSCALE`` and ``BZERO`` in the HDU's header. This method should only
be used right before writing to the output file, as the data will be
scaled and is therefore not very usable after the call.
Parameters
----------
type : str, optional
destination data type, use a string representing a numpy
dtype name, (e.g. ``'uint8'``, ``'int16'``, ``'float32'``
etc.). If is `None`, use the current data type.
option : str, optional
How to scale the data: ``"old"`` uses the original ``BSCALE`` and
``BZERO`` values from when the data was read/created (defaulting to
1 and 0 if they don't exist). For integer data only, ``"minmax"``
uses the minimum and maximum of the data to scale. User-specified
``bscale``/``bzero`` values always take precedence.
bscale, bzero : int, optional
User-specified ``BSCALE`` and ``BZERO`` values
"""
# Disable blank support for now
self._scale_internal(
type=type, option=option, bscale=bscale, bzero=bzero, blank=None
)
def _scale_internal(
self, type=None, option="old", bscale=None, bzero=None, blank=0
):
"""
This is an internal implementation of the `scale` method, which
also supports handling BLANK properly.
TODO: This is only needed for fixing #3865 without introducing any
public API changes. We should support BLANK better when rescaling
data, and when that is added the need for this internal interface
should go away.
Note: the default of ``blank=0`` merely reflects the current behavior,
and is not necessarily a deliberate choice (better would be to disallow
conversion of floats to ints without specifying a BLANK if there are
NaN/inf values).
"""
if self.data is None:
return
# Determine the destination (numpy) data type
if type is None:
type = BITPIX2DTYPE[self._bitpix]
_type = getattr(np, type)
# Determine how to scale the data
# bscale and bzero takes priority
if bscale is not None and bzero is not None:
_scale = bscale
_zero = bzero
elif bscale is not None:
_scale = bscale
_zero = 0
elif bzero is not None:
_scale = 1
_zero = bzero
elif (
option == "old"
and self._orig_bscale is not None
and self._orig_bzero is not None
):
_scale = self._orig_bscale
_zero = self._orig_bzero
elif option == "minmax" and not issubclass(_type, np.floating):
if _is_dask_array(self.data):
min = self.data.min().compute()
max = self.data.max().compute()
else:
min = np.minimum.reduce(self.data.flat)
max = np.maximum.reduce(self.data.flat)
if _type == np.uint8: # uint8 case
_zero = min
_scale = (max - min) / (2.0**8 - 1)
else:
_zero = (max + min) / 2.0
# throw away -2^N
nbytes = 8 * _type().itemsize
_scale = (max - min) / (2.0**nbytes - 2)
else:
_scale = 1
_zero = 0
# Do the scaling
if _zero != 0:
if _is_dask_array(self.data):
self.data = self.data - _zero
else:
# 0.9.6.3 to avoid out of range error for BZERO = +32768
# We have to explicitly cast _zero to prevent numpy from raising an
# error when doing self.data -= zero, and we do this instead of
# self.data = self.data - zero to avoid doubling memory usage.
self.data -= np.array(_zero).astype(self.data.dtype, casting="unsafe")
self._header["BZERO"] = _zero
else:
self._header.remove("BZERO", ignore_missing=True)
if _scale and _scale != 1:
self.data = self.data / _scale
self._header["BSCALE"] = _scale
else:
self._header.remove("BSCALE", ignore_missing=True)
# Set blanks
if blank is not None and issubclass(_type, np.integer):
# TODO: Perhaps check that the requested BLANK value fits in the
# integer type being scaled to?
self.data[np.isnan(self.data)] = blank
self._header["BLANK"] = blank
if self.data.dtype.type != _type:
if issubclass(_type, np.floating):
self.data = np.array(self.data, dtype=_type)
else:
self.data = np.array(np.around(self.data), dtype=_type)
# Update the BITPIX Card to match the data
self._bitpix = DTYPE2BITPIX[self.data.dtype.name]
self._bzero = self._header.get("BZERO", 0)
self._bscale = self._header.get("BSCALE", 1)
self._blank = blank
self._header["BITPIX"] = self._bitpix
# Since the image has been manually scaled, the current
# bitpix/bzero/bscale now serve as the 'original' scaling of the image,
# as though the original image has been completely replaced
self._orig_bitpix = self._bitpix
self._orig_bzero = self._bzero
self._orig_bscale = self._bscale
self._orig_blank = self._blank
def _verify(self, option="warn"):
# update_header can fix some things that would otherwise cause
# verification to fail, so do that now...
self.update_header()
self._verify_blank()
return super()._verify(option)
def _verify_blank(self):
# Probably not the best place for this (it should probably happen
# in _verify as well) but I want to be able to raise this warning
# both when the HDU is created and when written
if self._blank is None:
return
messages = []
# TODO: Once the FITSSchema framewhere is merged these warnings
# should be handled by the schema
if not _is_int(self._blank):
messages.append(
f"Invalid value for 'BLANK' keyword in header: {self._blank!r} "
"The 'BLANK' keyword must be an integer. It will be "
"ignored in the meantime."
)
self._blank = None
if not self._bitpix > 0:
messages.append(
"Invalid 'BLANK' keyword in header. The 'BLANK' keyword "
"is only applicable to integer data, and will be ignored "
"in this HDU."
)
self._blank = None
for msg in messages:
warnings.warn(msg, VerifyWarning)
def _prewriteto(self, checksum=False, inplace=False):
if self._scale_back:
self._scale_internal(
BITPIX2DTYPE[self._orig_bitpix], blank=self._orig_blank
)
self.update_header()
if not inplace and self._data_needs_rescale:
# Go ahead and load the scaled image data and update the header
# with the correct post-rescaling headers
_ = self.data
return super()._prewriteto(checksum, inplace)
def _writedata_internal(self, fileobj):
size = 0
if self.data is None:
return size
elif _is_dask_array(self.data):
return self._writeinternal_dask(fileobj)
else:
# Based on the system type, determine the byteorders that
# would need to be swapped to get to big-endian output
if sys.byteorder == "little":
swap_types = ("<", "=")
else:
swap_types = ("<",)
# deal with unsigned integer 16, 32 and 64 data
if _is_pseudo_integer(self.data.dtype):
# Convert the unsigned array to signed
output = np.array(
self.data - _pseudo_zero(self.data.dtype),
dtype=f">i{self.data.dtype.itemsize}",
)
should_swap = False
else:
output = self.data
byteorder = output.dtype.str[0]
should_swap = byteorder in swap_types
if should_swap:
if output.flags.writeable:
output.byteswap(True)
try:
fileobj.writearray(output)
finally:
output.byteswap(True)
else:
# For read-only arrays, there is no way around making
# a byteswapped copy of the data.
fileobj.writearray(output.byteswap(False))
else:
fileobj.writearray(output)
size += output.size * output.itemsize
return size
def _writeinternal_dask(self, fileobj):
if sys.byteorder == "little":
swap_types = ("<", "=")
else:
swap_types = ("<",)
# deal with unsigned integer 16, 32 and 64 data
if _is_pseudo_integer(self.data.dtype):
raise NotImplementedError("This dtype isn't currently supported with dask.")
else:
output = self.data
byteorder = output.dtype.str[0]
should_swap = byteorder in swap_types
if should_swap:
from dask.utils import M
# NOTE: the inplace flag to byteswap needs to be False otherwise the array is
# byteswapped in place every time it is computed and this affects
# the input dask array.
output = output.map_blocks(M.byteswap, False)
output = output.view(output.dtype.newbyteorder("S"))
initial_position = fileobj.tell()
n_bytes = output.nbytes
# Extend the file n_bytes into the future
fileobj.seek(initial_position + n_bytes - 1)
fileobj.write(b"\0")
fileobj.flush()
if fileobj.fileobj_mode not in ("rb+", "wb+", "ab+"):
# Use another file handle if the current one is not in
# read/write mode
fp = open(fileobj.name, mode="rb+")
should_close = True
else:
fp = fileobj._file
should_close = False
try:
outmmap = mmap.mmap(
fp.fileno(), length=initial_position + n_bytes, access=mmap.ACCESS_WRITE
)
outarr = np.ndarray(
shape=output.shape,
dtype=output.dtype,
offset=initial_position,
buffer=outmmap,
)
output.store(outarr, lock=True, compute=True)
finally:
if should_close:
fp.close()
outmmap.close()
# On Windows closing the memmap causes the file pointer to return to 0, so
# we need to go back to the end of the data (since padding may be written
# after)
fileobj.seek(initial_position + n_bytes)
return n_bytes
def _dtype_for_bitpix(self):
"""
Determine the dtype that the data should be converted to depending on
the BITPIX value in the header, and possibly on the BSCALE value as
well. Returns None if there should not be any change.
"""
bitpix = self._orig_bitpix
# Handle possible conversion to uints if enabled
if self._uint and self._orig_bscale == 1:
if bitpix == 8 and self._orig_bzero == -128:
return np.dtype("int8")
for bits, dtype in (
(16, np.dtype("uint16")),
(32, np.dtype("uint32")),
(64, np.dtype("uint64")),
):
if bitpix == bits and self._orig_bzero == 1 << (bits - 1):
return dtype
if bitpix > 16: # scale integers to Float64
return np.dtype("float64")
elif bitpix > 0: # scale integers to Float32
return np.dtype("float32")
def _convert_pseudo_integer(self, data):
"""
Handle "pseudo-unsigned" integers, if the user requested it. Returns
the converted data array if so; otherwise returns None.
In this case case, we don't need to handle BLANK to convert it to NAN,
since we can't do NaNs with integers, anyway, i.e. the user is
responsible for managing blanks.
"""
dtype = self._dtype_for_bitpix()
# bool(dtype) is always False--have to explicitly compare to None; this
# caused a fair amount of hair loss
if dtype is not None and dtype.kind == "u":
# Convert the input raw data into an unsigned integer array and
# then scale the data adjusting for the value of BZERO. Note that
# we subtract the value of BZERO instead of adding because of the
# way numpy converts the raw signed array into an unsigned array.
bits = dtype.itemsize * 8
data = np.array(data, dtype=dtype)
data -= np.uint64(1 << (bits - 1))
return data
def _get_scaled_image_data(self, offset, shape):
"""
Internal function for reading image data from a file and apply scale
factors to it. Normally this is used for the entire image, but it
supports alternate offset/shape for Section support.
"""
code = BITPIX2DTYPE[self._orig_bitpix]
raw_data = self._get_raw_data(shape, code, offset)
raw_data.dtype = raw_data.dtype.newbyteorder(">")
return self._scale_data(raw_data)
def _scale_data(self, raw_data):
if self._do_not_scale_image_data or (
self._orig_bzero == 0 and self._orig_bscale == 1 and self._blank is None
):
# No further conversion of the data is necessary
return raw_data
try:
if self._file.strict_memmap:
raise ValueError(
"Cannot load a memory-mapped image: "
"BZERO/BSCALE/BLANK header keywords present. "
"Set memmap=False."
)
except AttributeError: # strict_memmap not set
pass
data = None
if not (self._orig_bzero == 0 and self._orig_bscale == 1):
data = self._convert_pseudo_integer(raw_data)
if data is None:
# In these cases, we end up with floating-point arrays and have to
# apply bscale and bzero. We may have to handle BLANK and convert
# to NaN in the resulting floating-point arrays.
# The BLANK keyword should only be applied for integer data (this
# is checked in __init__ but it can't hurt to double check here)
blanks = None
if self._blank is not None and self._bitpix > 0:
blanks = raw_data.flat == self._blank
# The size of blanks in bytes is the number of elements in
# raw_data.flat. However, if we use np.where instead we will
# only use 8 bytes for each index where the condition is true.
# So if the number of blank items is fewer than
# len(raw_data.flat) / 8, using np.where will use less memory
if blanks.sum() < len(blanks) / 8:
blanks = np.where(blanks)
new_dtype = self._dtype_for_bitpix()
if new_dtype is not None:
data = np.array(raw_data, dtype=new_dtype)
else: # floating point cases
if self._file is not None and self._file.memmap:
data = raw_data.copy()
elif not raw_data.flags.writeable:
# create a writeable copy if needed
data = raw_data.copy()
# if not memmap, use the space already in memory
else:
data = raw_data
del raw_data
if self._orig_bscale != 1:
np.multiply(data, self._orig_bscale, data)
if self._orig_bzero != 0:
data += self._orig_bzero
if self._blank:
data.flat[blanks] = np.nan
return data
def _summary(self):
"""
Summarize the HDU: name, dimensions, and formats.
"""
class_name = self.__class__.__name__
# if data is touched, use data info.
if self._data_loaded:
if self.data is None:
format = ""
else:
format = self.data.dtype.name
format = format[format.rfind(".") + 1 :]
else:
if self.shape and all(self.shape):
# Only show the format if all the dimensions are non-zero
# if data is not touched yet, use header info.
format = BITPIX2DTYPE[self._bitpix]
else:
format = ""
if (
format
and not self._do_not_scale_image_data
and (self._orig_bscale != 1 or self._orig_bzero != 0)
):
new_dtype = self._dtype_for_bitpix()
if new_dtype is not None:
format += f" (rescales to {new_dtype.name})"
# Display shape in FITS-order
shape = tuple(reversed(self.shape))
return (self.name, self.ver, class_name, len(self._header), shape, format, "")
def _calculate_datasum(self):
"""
Calculate the value for the ``DATASUM`` card in the HDU.
"""
if self._has_data:
# We have the data to be used.
d = self.data
# First handle the special case where the data is unsigned integer
# 16, 32 or 64
if _is_pseudo_integer(self.data.dtype):
d = np.array(
self.data - _pseudo_zero(self.data.dtype),
dtype=f"i{self.data.dtype.itemsize}",
)
# Check the byte order of the data. If it is little endian we
# must swap it before calculating the datasum.
if d.dtype.str[0] != ">":
if d.flags.writeable:
byteswapped = True
d = d.byteswap(True)
d.dtype = d.dtype.newbyteorder(">")
else:
# If the data is not writeable, we just make a byteswapped
# copy and don't bother changing it back after
d = d.byteswap(False)
d.dtype = d.dtype.newbyteorder(">")
byteswapped = False
else:
byteswapped = False
cs = self._compute_checksum(d.ravel().view(np.uint8))
# If the data was byteswapped in this method then return it to
# its original little-endian order.
if byteswapped and not _is_pseudo_integer(self.data.dtype):
d.byteswap(True)
d.dtype = d.dtype.newbyteorder("<")
return cs
else:
# This is the case where the data has not been read from the file
# yet. We can handle that in a generic manner so we do it in the
# base class. The other possibility is that there is no data at
# all. This can also be handled in a generic manner.
return super()._calculate_datasum()
[docs]
class Section:
"""
Class enabling subsets of ImageHDU data to be loaded lazily via slicing.
Slices of this object load the corresponding section of an image array from
the underlying FITS file, and applies any BSCALE/BZERO factors.
Section slices cannot be assigned to, and modifications to a section are
not saved back to the underlying file.
See the :ref:`astropy:data-sections` section of the Astropy documentation
for more details.
"""
def __init__(self, hdu):
self.hdu = hdu
@property
def shape(self):
# Implementing `.shape` enables `astropy.nddata.Cutout2D` to accept
# `ImageHDU.section` in place of `.data`.
return self.hdu.shape
def __getitem__(self, key):
"""Returns a slice of HDU data specified by `key`.
If the image HDU is backed by a file handle, this method will only read
the chunks of the file needed to extract `key`, which is useful in
situations where the file is located on a slow or remote file system
(e.g., cloud storage).
"""
if not isinstance(key, tuple):
key = (key,)
naxis = len(self.hdu.shape)
return_scalar = (
all(isinstance(k, (int, np.integer)) for k in key) and len(key) == naxis
)
if not any(k is Ellipsis for k in key):
# We can always add a ... at the end, after making note of whether
# to return a scalar.
key += (Ellipsis,)
ellipsis_count = len([k for k in key if k is Ellipsis])
if len(key) - ellipsis_count > naxis or ellipsis_count > 1:
raise IndexError("too many indices for array")
# Insert extra dimensions as needed.
idx = next(i for i, k in enumerate(key + (Ellipsis,)) if k is Ellipsis)
key = key[:idx] + (slice(None),) * (naxis - len(key) + 1) + key[idx + 1 :]
return_0dim = (
all(isinstance(k, (int, np.integer)) for k in key) and len(key) == naxis
)
dims = []
offset = 0
# Find all leading axes for which a single point is used.
for idx in range(naxis):
axis = self.hdu.shape[idx]
indx = _IndexInfo(key[idx], axis)
offset = offset * axis + indx.offset
if not _is_int(key[idx]):
dims.append(indx.npts)
break
is_contiguous = indx.contiguous
for jdx in range(idx + 1, naxis):
axis = self.hdu.shape[jdx]
indx = _IndexInfo(key[jdx], axis)
dims.append(indx.npts)
if indx.npts == axis and indx.contiguous:
# The offset needs to multiply the length of all remaining axes
offset *= axis
else:
is_contiguous = False
if is_contiguous:
dims = tuple(dims) or (1,)
bitpix = self.hdu._orig_bitpix
offset = self.hdu._data_offset + offset * abs(bitpix) // 8
# Note: the actual file read operations are delegated to
# `util._array_from_file` via `ImageHDU._get_scaled_image_data`
data = self.hdu._get_scaled_image_data(offset, dims)
else:
data = self._getdata(key)
if return_scalar:
data = data.item()
elif return_0dim:
data = data.squeeze()
return data
def _getdata(self, keys):
for idx, (key, axis) in enumerate(zip(keys, self.hdu.shape)):
if isinstance(key, slice):
ks = range(*key.indices(axis))
break
if isiterable(key):
# Handle both integer and boolean arrays.
ks = np.arange(axis, dtype=int)[key]
break
# This should always break at some point if _getdata is called.
data = [self[keys[:idx] + (k,) + keys[idx + 1 :]] for k in ks]
if any(isinstance(key, slice) or isiterable(key) for key in keys[idx + 1 :]):
# data contains multidimensional arrays; combine them.
return np.array(data)
else:
# Only singleton dimensions remain; concatenate in a 1D array.
return np.concatenate([np.atleast_1d(array) for array in data])
[docs]
class PrimaryHDU(_ImageBaseHDU):
"""
FITS primary HDU class.
"""
_default_name = "PRIMARY"
def __init__(
self,
data=None,
header=None,
do_not_scale_image_data=False,
ignore_blank=False,
uint=True,
scale_back=None,
):
"""
Construct a primary HDU.
Parameters
----------
data : array or ``astropy.io.fits.hdu.base.DELAYED``, optional
The data in the HDU.
header : `~astropy.io.fits.Header`, optional
The header to be used (as a template). If ``header`` is `None`, a
minimal header will be provided.
do_not_scale_image_data : bool, optional
If `True`, image data is not scaled using BSCALE/BZERO values
when read. (default: False)
ignore_blank : bool, optional
If `True`, the BLANK header keyword will be ignored if present.
Otherwise, pixels equal to this value will be replaced with
NaNs. (default: False)
uint : bool, optional
Interpret signed integer data where ``BZERO`` is the
central value and ``BSCALE == 1`` as unsigned integer
data. For example, ``int16`` data with ``BZERO = 32768``
and ``BSCALE = 1`` would be treated as ``uint16`` data.
(default: True)
scale_back : bool, optional
If `True`, when saving changes to a file that contained scaled
image data, restore the data to the original type and reapply the
original BSCALE/BZERO values. This could lead to loss of accuracy
if scaling back to integer values after performing floating point
operations on the data. Pseudo-unsigned integers are automatically
rescaled unless scale_back is explicitly set to `False`.
(default: None)
"""
super().__init__(
data=data,
header=header,
do_not_scale_image_data=do_not_scale_image_data,
uint=uint,
ignore_blank=ignore_blank,
scale_back=scale_back,
)
# insert the keywords EXTEND
if header is None:
dim = self._header["NAXIS"]
if dim == 0:
dim = ""
self._header.set("EXTEND", True, after="NAXIS" + str(dim))
def _verify(self, option="warn"):
errs = super()._verify(option=option)
# Verify location and value of mandatory keywords.
# The EXTEND keyword is only mandatory if the HDU has extensions; this
# condition is checked by the HDUList object. However, if we already
# have an EXTEND keyword check that its position is correct
if "EXTEND" in self._header:
naxis = self._header.get("NAXIS", 0)
self.req_cards(
"EXTEND", naxis + 3, lambda v: isinstance(v, bool), True, option, errs
)
return errs
[docs]
class ImageHDU(_ImageBaseHDU, ExtensionHDU):
"""
FITS image extension HDU class.
"""
_extension = "IMAGE"
def __init__(
self,
data=None,
header=None,
name=None,
do_not_scale_image_data=False,
uint=True,
scale_back=None,
ver=None,
):
"""
Construct an image HDU.
Parameters
----------
data : array
The data in the HDU.
header : `~astropy.io.fits.Header`
The header to be used (as a template). If ``header`` is
`None`, a minimal header will be provided.
name : str, optional
The name of the HDU, will be the value of the keyword
``EXTNAME``.
do_not_scale_image_data : bool, optional
If `True`, image data is not scaled using BSCALE/BZERO values
when read. (default: False)
uint : bool, optional
Interpret signed integer data where ``BZERO`` is the
central value and ``BSCALE == 1`` as unsigned integer
data. For example, ``int16`` data with ``BZERO = 32768``
and ``BSCALE = 1`` would be treated as ``uint16`` data.
(default: True)
scale_back : bool, optional
If `True`, when saving changes to a file that contained scaled
image data, restore the data to the original type and reapply the
original BSCALE/BZERO values. This could lead to loss of accuracy
if scaling back to integer values after performing floating point
operations on the data. Pseudo-unsigned integers are automatically
rescaled unless scale_back is explicitly set to `False`.
(default: None)
ver : int > 0 or None, optional
The ver of the HDU, will be the value of the keyword ``EXTVER``.
If not given or None, it defaults to the value of the ``EXTVER``
card of the ``header`` or 1.
(default: None)
"""
# This __init__ currently does nothing differently from the base class,
# and is only explicitly defined for the docstring.
super().__init__(
data=data,
header=header,
name=name,
do_not_scale_image_data=do_not_scale_image_data,
uint=uint,
scale_back=scale_back,
ver=ver,
)
def _verify(self, option="warn"):
"""
ImageHDU verify method.
"""
errs = super()._verify(option=option)
naxis = self._header.get("NAXIS", 0)
# PCOUNT must == 0, GCOUNT must == 1; the former is verified in
# ExtensionHDU._verify, however ExtensionHDU._verify allows PCOUNT
# to be >= 0, so we need to check it here
self.req_cards(
"PCOUNT", naxis + 3, lambda v: (_is_int(v) and v == 0), 0, option, errs
)
return errs
class _IndexInfo:
def __init__(self, indx, naxis):
if _is_int(indx):
if indx < 0: # support negative indexing
indx = indx + naxis
if 0 <= indx < naxis:
self.npts = 1
self.offset = indx
self.contiguous = True
else:
raise IndexError(f"Index {indx} out of range.")
elif isinstance(indx, slice):
start, stop, step = indx.indices(naxis)
self.npts = (stop - start) // step
self.offset = start
self.contiguous = step == 1
elif isiterable(indx):
self.npts = len(indx)
self.offset = 0
self.contiguous = False
else:
raise IndexError(f"Illegal index {indx}")