Source code for astropy.io.fits.hdu.compressed._codecs

"""
This module contains the FITS compression algorithms in numcodecs style Codecs.
"""
from gzip import compress as gzip_compress
from gzip import decompress as gzip_decompress

import numpy as np

from astropy.io.fits.hdu.compressed._compression import (
    compress_hcompress_1_c,
    compress_plio_1_c,
    compress_rice_1_c,
    decompress_hcompress_1_c,
    decompress_plio_1_c,
    decompress_rice_1_c,
)

# If numcodecs is installed, we use Codec as a base class for the codecs below
# so that they can optionally be used as codecs in any package relying on
# numcodecs - however this is optional and if numcodecs is not installed we use
# our own base class. This does not affect any compressed data functionality
# in astropy.io.fits.
try:
    from numcodecs.abc import Codec
except ImportError:

    class Codec:
        codec_id = None


__all__ = [
    "Gzip1",
    "Gzip2",
    "Rice1",
    "PLIO1",
    "HCompress1",
    "NoCompress",
]


def _as_big_endian_array(data):
    return data.astype(np.asarray(data).dtype.newbyteorder(">"), copy=False)


def _as_native_endian_array(data):
    if data.dtype.isnative:
        return data
    else:
        return data.astype(np.asarray(data).dtype.newbyteorder("="), copy=False)



[docs]
class NoCompress(Codec):
    """
    A dummy compression/decompression algorithm that stores the data as-is.

    While the data is not compressed/decompressed, it is converted to big
    endian during encoding as this is what is expected in FITS files.
    """

    codec_id = "FITS_NOCOMPRESS"


[docs]
    def decode(self, buf):
        """
        Decompress buffer using the NOCOMPRESS algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to decompress.

        Returns
        -------
        buf : np.ndarray
            The decompressed buffer.
        """
        return np.frombuffer(buf, dtype=np.uint8)



[docs]
    def encode(self, buf):
        """
        Compress the data in the buffer using the NOCOMPRESS algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to compress.

        Returns
        -------
        bytes
            The compressed bytes.
        """
        return _as_big_endian_array(buf).tobytes()





[docs]
class Gzip1(Codec):
    """
    The FITS GZIP 1 compression and decompression algorithm.

    The Gzip algorithm is used in the free GNU software compression utility of
    the same name. It was created by J. L. Gailly and M. Adler, based on the
    DEFLATE algorithm (Deutsch 1996), which is a combination of LZ77 (Ziv &
    Lempel 1977) and Huffman coding.
    """

    codec_id = "FITS_GZIP1"


[docs]
    def decode(self, buf):
        """
        Decompress buffer using the GZIP_1 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to decompress.

        Returns
        -------
        buf : np.ndarray
            The decompressed buffer.
        """
        # In principle we should be able to not have .tobytes() here and avoid
        # the copy but this does not work correctly in Python 3.11.
        cbytes = np.frombuffer(buf, dtype=np.uint8).tobytes()
        dbytes = gzip_decompress(cbytes)
        return np.frombuffer(dbytes, dtype=np.uint8)



[docs]
    def encode(self, buf):
        """
        Compress the data in the buffer using the GZIP_1 algorithm.

        Parameters
        ----------
        buf _like
            The buffer to compress.

        Returns
        -------
        bytes
            The compressed bytes.
        """
        # Data bytes should be stored as big endian in files
        # In principle we should be able to not have .tobytes() here and avoid
        # the copy but this does not work correctly in Python 3.11.
        dbytes = _as_big_endian_array(buf).tobytes()
        return gzip_compress(dbytes)





[docs]
class Gzip2(Codec):
    """
    The FITS GZIP2 compression and decompression algorithm.

    The gzip2 algorithm is a variation on 'GZIP 1'. In this case the buffer in
    the array of data values are shuffled so that they are arranged in order of
    decreasing significance before being compressed.

    For example, a five-element contiguous array of two-byte (16-bit) integer
    values, with an original big-endian byte order of:

    .. math::
        A1 A2 B1 B2 C1 C2 D1 D2 E1 E2

    will have the following byte order after shuffling:

    .. math::
        A1 B1 C1 D1 E1 A2 B2 C2 D2 E2,

    where A1, B1, C1, D1, and E1 are the most-significant buffer from
    each of the integer values.

    Byte shuffling shall only be performed for integer or floating-point
    numeric data types; logical, bit, and character types must not be shuffled.

    Parameters
    ----------
    itemsize
        The number of buffer per value (e.g. 2 for a 16-bit integer)

    """

    codec_id = "FITS_GZIP2"

    def __init__(self, *, itemsize: int):
        super().__init__()
        self.itemsize = itemsize


[docs]
    def decode(self, buf):
        """
        Decompress buffer using the GZIP_2 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to decompress.

        Returns
        -------
        buf : np.ndarray
            The decompressed buffer.
        """
        cbytes = np.frombuffer(buf, dtype=np.uint8).tobytes()
        # Start off by unshuffling buffer
        unshuffled_buffer = gzip_decompress(cbytes)
        array = np.frombuffer(unshuffled_buffer, dtype=np.uint8)
        return array.reshape((self.itemsize, -1)).T.ravel()



[docs]
    def encode(self, buf):
        """
        Compress the data in the buffer using the GZIP_2 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to compress.

        Returns
        -------
        bytes
            The compressed bytes.
        """
        # Data bytes should be stored as big endian in files
        array = _as_big_endian_array(buf).ravel()
        # Shuffle the buffer
        itemsize = array.dtype.itemsize
        array = array.view(np.uint8)
        shuffled_buffer = array.reshape((-1, itemsize)).T.ravel().tobytes()
        return gzip_compress(shuffled_buffer)





[docs]
class Rice1(Codec):
    """
    The FITS RICE1 compression and decompression algorithm.

    The Rice algorithm [1]_ is simple and very fast It requires only enough
    memory to hold a single block of 16 or 32 pixels at a time. It codes the
    pixels in small blocks and so is able to adapt very quickly to changes in
    the input image statistics (e.g., Rice has no problem handling cosmic rays,
    bright stars, saturated pixels, etc.).

    Parameters
    ----------
    blocksize
        The blocksize to use, each tile is coded into blocks a number of pixels
        wide. The default value in FITS headers is 32 pixels per block.

    bytepix
        The number of 8-bit buffer in each original integer pixel value.

    References
    ----------
    .. [1] Rice, R. F., Yeh, P.-S., and Miller, W. H. 1993, in Proc. of the 9th
           AIAA Computing in Aerospace Conf., AIAA-93-4541-CP, American Institute of
           Aeronautics and Astronautics [https://doi.org/10.2514/6.1993-4541]
    """

    codec_id = "FITS_RICE1"

    def __init__(self, *, blocksize: int, bytepix: int, tilesize: int):
        self.blocksize = blocksize
        self.bytepix = bytepix
        self.tilesize = tilesize


[docs]
    def decode(self, buf):
        """
        Decompress buffer using the RICE_1 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to decompress.

        Returns
        -------
        buf : np.ndarray
            The decompressed buffer.
        """
        cbytes = np.frombuffer(_as_native_endian_array(buf), dtype=np.uint8).tobytes()
        dbytes = decompress_rice_1_c(
            cbytes, self.blocksize, self.bytepix, self.tilesize
        )
        return np.frombuffer(dbytes, dtype=f"i{self.bytepix}")



[docs]
    def encode(self, buf):
        """
        Compress the data in the buffer using the RICE_1 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to compress.

        Returns
        -------
        bytes
            The compressed bytes.
        """
        # We convert the data to native endian because it is passed to the
        # C compression code which will interpret it as being native endian.
        dbytes = (
            _as_native_endian_array(buf)
            .astype(f"i{self.bytepix}", copy=False)
            .tobytes()
        )
        return compress_rice_1_c(dbytes, self.blocksize, self.bytepix)





[docs]
class PLIO1(Codec):
    """
    The FITS PLIO1 compression and decompression algorithm.

    The IRAF PLIO (pixel list) algorithm was developed to store integer-valued
    image masks in a compressed form. Such masks often have large regions of
    constant value hence are highly compressible. The compression algorithm
    used is based on run-length encoding, with the ability to dynamically
    follow level changes in the image, allowing a 16-bit encoding to be used
    regardless of the image depth.
    """

    codec_id = "FITS_PLIO1"

    def __init__(self, *, tilesize: int):
        self.tilesize = tilesize


[docs]
    def decode(self, buf):
        """
        Decompress buffer using the PLIO_1 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to decompress.

        Returns
        -------
        buf : np.ndarray
            The decompressed buffer.
        """
        cbytes = np.frombuffer(_as_native_endian_array(buf), dtype=np.uint8).tobytes()
        dbytes = decompress_plio_1_c(cbytes, self.tilesize)
        return np.frombuffer(dbytes, dtype="i4")



[docs]
    def encode(self, buf):
        """
        Compress the data in the buffer using the PLIO_1 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to compress.

        Returns
        -------
        bytes
            The compressed bytes.
        """
        # We convert the data to native endian because it is passed to the
        # C compression code which will interpret it as being native endian.
        dbytes = _as_native_endian_array(buf).astype("i4", copy=False).tobytes()
        return compress_plio_1_c(dbytes, self.tilesize)





[docs]
class HCompress1(Codec):
    """
    The FITS HCompress compression and decompression algorithm.

    Hcompress is an the image compression package written by Richard L. White
    for use at the Space Telescope Science Institute. Hcompress was used to
    compress the STScI Digitized Sky Survey and has also been used to compress
    the preview images in the Hubble Data Archive.

    The technique gives very good compression for astronomical images and is
    relatively fast. The calculations are carried out using integer arithmetic
    and are entirely reversible. Consequently, the program can be used for
    either lossy or lossless compression, with no special approach needed for
    the lossless case.

    Parameters
    ----------
    scale
        The integer scale parameter determines the amount of compression. Scale
        = 0 or 1 leads to lossless compression, i.e. the decompressed image has
        exactly the same pixel values as the original image. If the scale
        factor is greater than 1 then the compression is lossy: the
        decompressed image will not be exactly the same as the original

    smooth
        At high compressions factors the decompressed image begins to appear
        blocky because of the way information is discarded. This blockiness
        ness is greatly reduced, producing more pleasing images, if the image
        is smoothed slightly during decompression.

    References
    ----------
    .. [1] White, R. L. 1992, in Proceedings of the NASA Space and Earth Science
           Data Compression Workshop, ed. J. C. Tilton, Snowbird, UT;
           https://archive.org/details/nasa_techdoc_19930016742
    """

    codec_id = "FITS_HCOMPRESS1"

    def __init__(self, *, scale: int, smooth: bool, bytepix: int, nx: int, ny: int):
        self.scale = scale
        self.smooth = smooth
        self.bytepix = bytepix
        # NOTE: we should probably make this less confusing, but nx is shape[0] and ny is shape[1]
        self.nx = nx
        self.ny = ny


[docs]
    def decode(self, buf):
        """
        Decompress buffer using the HCOMPRESS_1 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to decompress.

        Returns
        -------
        buf : np.ndarray
            The decompressed buffer.
        """
        cbytes = np.frombuffer(_as_native_endian_array(buf), dtype=np.uint8).tobytes()
        dbytes = decompress_hcompress_1_c(
            cbytes, self.nx, self.ny, self.scale, self.smooth, self.bytepix
        )
        # fits_hdecompress* always returns 4 byte integers irrespective of bytepix
        return np.frombuffer(dbytes, dtype="i4")



[docs]
    def encode(self, buf):
        """
        Compress the data in the buffer using the HCOMPRESS_1 algorithm.

        Parameters
        ----------
        buf : bytes or array_like
            The buffer to compress.

        Returns
        -------
        bytes
            The compressed bytes.
        """
        # We convert the data to native endian because it is passed to the
        # C compression code which will interpret it as being native endian.
        dbytes = (
            _as_native_endian_array(buf)
            .astype(f"i{self.bytepix}", copy=False)
            .tobytes()
        )
        return compress_hcompress_1_c(
            dbytes, self.nx, self.ny, self.scale, self.bytepix
        )