imap_processing/imap_processing/tests/test_utils.py at dev · ashokcoding/imap_processing

462 lines (400 loc) · 15.6 KB
"""Tests coverage for imap_processing/utils.py"""
from unittest import mock
import numpy as np
import pandas as pd
import pytest
import xarray as xr
from imap_processing import imap_module_directory, utils
from imap_processing.spice.time import str_yyyymmdd_to_ttj2000ns
from imap_processing.ultra.utils.ultra_l1_utils import extract_data_dict
from imap_processing.utils import check_epochs_within_day_offsets
def test_convert_raw_to_eu(tmp_path):
    """Test coverage for utils.convert_raw_to_eu()"""
    # Generate a csv for testing
    # Include segmented and unsegmented polyvals
    dn_cutoff = 2
    test_df = pd.DataFrame(
        data={
            "packetName": ["PACKET_0", "PACKET_0", "PACKET_2", "PACKET_2", "PACKET_1"],
            "mnemonic": ["FIELD_0", "FIELD_1", "FIELD_2", "FIELD_2", "FIELD_0"],
            "convertAs": [
                "UNSEGMENTED_POLY",
                "UNSEGMENTED_POLY",
                "SEGMENTED_POLY",
                "SEGMENTED_POLY",
                "FOO_METHOD",
            "dn_range_start": [0, 0, 1, dn_cutoff + 1, 0],
            "dn_range_stop": [0, 0, dn_cutoff, 4, 0],
            "c0": [0, 1, 3, 4, 2],
            "c1": [0, 1, 3, 4, 2],
            "c2": [0, 0, 3, 4, 2],
            "c3": [0, 0, 3, 4, 2],
            "c5": [0, 1, 3, 4, 2],
            "c6": [0, 0, 3, 4, 2],
            "c7": [0, 0, 3, 4, 2],
            "unit": ["a", "b", "d", "d", "c"],
    test_csv = tmp_path / "test_convert_table.csv"
    with open(test_csv, "w") as f:
        f.write("# Comment on first line of file\n")
        test_df.to_csv(f, index=False)
    # Generate a fake dataset for testing
    n_packets = 3
    field_0 = np.arange(n_packets)
    field_1 = np.arange(n_packets) + 10
    field_2 = np.arange(n_packets) + 1
    time = np.arange(n_packets) + 1000
    dn_dataset = xr.Dataset(
        data_vars=dict(
            FIELD_0=(["time"], field_0),
            FIELD_1=(["time"], field_1),
            FIELD_2=(["time"], field_2),
        coords=dict(
            time=time,
    eu_dataset = utils.convert_raw_to_eu(
        dn_dataset.copy(), test_csv.absolute(), "PACKET_0", comment="#"
    # Check the converted values by manually doing the polynomial math
    np.testing.assert_array_equal(eu_dataset["FIELD_0"].data, np.zeros(n_packets))
    assert eu_dataset["FIELD_0"].attrs["UNITS"] == test_df["unit"].iloc[0]
    field_1_coeffs = (
        test_df[test_df["mnemonic"] == "FIELD_1"].filter(regex=r"c\d").values[0]
    field_1_compare = np.zeros(n_packets)
    for p, coeff in enumerate(field_1_coeffs):
        field_1_compare += coeff * np.power(field_1, p)
    np.testing.assert_array_equal(eu_dataset["FIELD_1"].data, field_1_compare)
    eu_dataset = utils.convert_raw_to_eu(
        dn_dataset.copy(), test_csv.absolute(), "PACKET_2", comment="#"
    # Test the segmented polynomials
    field_2_coeffs = (
        test_df[test_df["mnemonic"] == "FIELD_2"].filter(regex=r"c\d").values
    field_2_compare_seg_1 = np.zeros(len(field_2[field_2 <= dn_cutoff]))
    field_2_compare_seg_2 = np.zeros(len(field_2[field_2 > dn_cutoff]))
    for p, coeff in enumerate(field_2_coeffs[0]):
        field_2_compare_seg_1 += coeff * np.power(field_2[field_2 <= dn_cutoff], p)
    for p, coeff in enumerate(field_2_coeffs[1]):
        field_2_compare_seg_2 += coeff * np.power(field_2[field_2 > dn_cutoff], p)
    field_2_compare = np.concatenate([field_2_compare_seg_1, field_2_compare_seg_2])
    np.testing.assert_array_equal(eu_dataset["FIELD_2"].data, field_2_compare)
    assert eu_dataset["FIELD_2"].attrs["UNITS"] == test_df["unit"].iloc[2]
    # Check that a ValueError is raised for unexpected conversion specified in
    # conversion table "convertAs" column
    with pytest.raises(
        ValueError,
        match="Column 'convertAs' must all be UNSEGMENTED_POLY or "
        "SEGMENTED_POLY for a packet name and mnemonic",
        utils.convert_raw_to_eu(
            dn_dataset.copy(), test_csv.absolute(), "PACKET_1", comment="#"
def test_segmented_poly_out_of_range(tmp_path):
    """Test that a value error is thrown if a raw DN value is out of range"""
    # Generate a csv for testing
    test_df = pd.DataFrame(
        data={
            "packetName": ["PACKET_0", "PACKET_0"],
            "mnemonic": ["FIELD_0", "FIELD_0"],
            "convertAs": ["SEGMENTED_POLY", "SEGMENTED_POLY"],
            # Make ranges higher than DNs
            "dn_range_start": [100, 201],
            "dn_range_stop": [200, 400],
            "c0": [0, 1],
            "c1": [0, 1],
            "c2": [0, 1],
            "c3": [0, 1],
            "c5": [0, 1],
            "c6": [0, 1],
            "c7": [0, 1],
            "unit": ["a", "b"],
    test_csv = tmp_path / "test_convert_table.csv"
    with open(test_csv, "w") as f:
        f.write("# Comment on first line of file\n")
        test_df.to_csv(f, index=False)
    # Generate a fake dataset for testing
    n_packets = 3
    field_0 = np.arange(n_packets)
    time = np.arange(n_packets) + 1000
    dn_dataset = xr.Dataset(
        data_vars=dict(
            FIELD_0=(["time"], field_0),
        coords=dict(
            time=time,
    # Check that a ValueError is raised for DNs not in any ranges
    with pytest.raises(
        ValueError, match="Raw DN values found outside of the expected range"
        utils.convert_raw_to_eu(
            dn_dataset.copy(), test_csv.absolute(), "PACKET_0", comment="#"
def test_unsegmented_poly_multiple_rows(tmp_path):
    """Test that a value error is thrown if there are multiple rows with the same
    mnemonic and packet name and convertAs is UNSEGMENTED_POLY"""
    # Generate a csv for testing
    test_df = pd.DataFrame(
        data={
            "packetName": ["PACKET_0", "PACKET_0"],
            "mnemonic": ["FIELD_0", "FIELD_0"],
            "convertAs": ["UNSEGMENTED_POLY", "UNSEGMENTED_POLY"],
            "c0": [0, 1],
            "c1": [0, 1],
            "c2": [0, 1],
            "c3": [0, 1],
            "c5": [0, 1],
            "c6": [0, 1],
            "c7": [0, 1],
            "unit": ["a", "a"],
    test_csv = tmp_path / "test_convert_table.csv"
    with open(test_csv, "w") as f:
        f.write("# Comment on first line of file\n")
        test_df.to_csv(f, index=False)
    # Generate a fake dataset for testing
    field_0 = np.arange(3)
    dn_dataset = xr.Dataset(
        data_vars=dict(
            FIELD_0=(["time"], field_0),
    # Check that a ValueError is raised for DNs not in any ranges
    with pytest.raises(
        ValueError,
        match="For unsegmented polynomial conversions, there should "
        "only be one row per mnemonic and packet name",
        utils.convert_raw_to_eu(
            dn_dataset.copy(), test_csv.absolute(), "PACKET_0", comment="#"
@pytest.mark.parametrize(
    "use_derived_value, expected_mode",
    [(True, np.array(["HVENG", "HVSCI"])), (False, np.array([2, 3]))],
def test_packet_file_to_datasets(use_derived_value, expected_mode):
    Test that all datatypes aren't all int64 and that we get
    uint8/uint16 from header items as expected.
    Test that we get multiple apids in the output.
    test_file = "tests/swapi/l0_data/imap_swapi_l0_raw_20240924_v001.pkts"
    packet_files = imap_module_directory / test_file
    packet_definition = (
        imap_module_directory / "swapi/packet_definitions/swapi_packet_definition.xml"
    datasets_by_apid = utils.packet_file_to_datasets(
        packet_files, packet_definition, use_derived_value=use_derived_value
    # 2 apids in the SWAPI test data that we decommutate
    # (2 others are not included in the XTCE definition, but are in the raw packet file)
    assert len(datasets_by_apid) == 2
    data = datasets_by_apid[1188]
    assert data["sec_hdr_flg"].dtype == np.uint8
    assert data["pkt_apid"].dtype == np.uint16
    np.testing.assert_array_equal(np.unique(data["mode"].data), expected_mode)
def test_packet_file_to_datasets_duplicates(tmpdir, caplog):
    Test that all datatypes aren't all int64 and that we get
    uint8/uint16 from header items as expected.
    Test that we get multiple apids in the output.
    test_file = "tests/swapi/l0_data/imap_swapi_l0_raw_20240924_v001.pkts"
    packet_file = imap_module_directory / test_file
    # Write the file out twice to double the number of binary packets in
    # a new file for testing
    with open(two_files := tmpdir / "two_files.pkts", "wb") as f:
        with open(packet_file, "rb") as original_file:
            data = original_file.read()
            f.write(data)
            f.write(data)
    packet_definition = (
        imap_module_directory / "swapi/packet_definitions/swapi_packet_definition.xml"
    ds_two_files = utils.packet_file_to_datasets(two_files, packet_definition)
    ds_one_file = utils.packet_file_to_datasets(packet_file, packet_definition)
    assert len(ds_two_files[1188]["epoch"]) == len(ds_one_file[1188]["epoch"])
    assert len(ds_two_files[1188]["epoch"]) == 153
    assert "Dropping duplicate packets" in caplog.records[0].message
def test_packet_file_to_datasets_flat_definition():
    test_file = "tests/idex/test_data/imap_idex_l0_raw_20231218_v001.pkts"
    packet_files = imap_module_directory / test_file
    packet_definition = (
        imap_module_directory
        / "idex/packet_definitions/idex_science_packet_definition.xml"
    with pytest.raises(ValueError, match="Packet fields do not match"):
        utils.packet_file_to_datasets(packet_files, packet_definition)
def test_combine_segmented_packets():
    """Test combine_segmented_packets function."""
    # unsegmented, first, middle, last, unsegmented
    sequence_flags = xr.DataArray(np.array([3, 1, 0, 2, 3]), dims=["epoch"])
    binary_data = xr.DataArray(
        np.array(
                b"ABC",
                b"abc",
            dtype=object,
        dims=["epoch"],
    ds = xr.Dataset(data_vars={"seq_flgs": sequence_flags, "packetdata": binary_data})
    combined_ds = utils.combine_segmented_packets(ds, "packetdata")
    expected_ds = xr.Dataset(
        data_vars={
            "seq_flgs": xr.DataArray(np.array([3, 1, 3]), dims=["epoch"]),
            "packetdata": xr.DataArray(
                np.array(
                        b"ABC",
                        b"abc",
                    dtype=object,
                dims=["epoch"],
    xr.testing.assert_equal(combined_ds, expected_ds)
def test_combine_single_segmented_packets(caplog):
    """Test combine_segmented_packets function when there are missing segments."""
    # Create a dataset with the MIDDLE and LAST segments missing.
    # unsegmented, first, unsegmented
    sequence_flags = xr.DataArray(np.array([3, 1, 3]), dims=["epoch"])
    binary_data = xr.DataArray(
        np.array(
                b"ABC",
                b"abc",
            dtype=object,
        dims=["epoch"],
    shcoarse = xr.DataArray(np.array([0, 1, 2]), dims=["epoch"])
    ds = xr.Dataset(
        data_vars={
            "seq_flgs": sequence_flags,
            "packetdata": binary_data,
            "shcoarse": shcoarse,
    combined_ds = utils.combine_segmented_packets(ds, "packetdata")
    # The combined dataset should only have the unsegmented packets
    # and a warning should be logged about the missing segments.
    expected_ds = xr.Dataset(
        data_vars={
            "seq_flgs": xr.DataArray(np.array([3, 3]), dims=["epoch"]),
            "packetdata": xr.DataArray(
                np.array(
                    [b"ABC", b"abc"],
                    dtype=object,
                dims=["epoch"],
            "shcoarse": xr.DataArray(np.array([0, 2]), dims=["epoch"]),
    xr.testing.assert_equal(combined_ds, expected_ds)
    # check that a warning was logged
    assert "Incorrect/incomplete sequence flags in group 2." in caplog.text
def test_check_source_sequence_counter(caplog):
    """Test _check_source_sequence_counter function."""
    data_vars = {
        "src_seq_ctr": (["epoch"], np.array([0, 1, 3, 4, 6])),
    ds = xr.Dataset(data_vars=data_vars)
    utils._check_source_sequence_counter(ds, apid=1234)
    assert "Found [2] gap(s) in source sequence counter for APID 1234" in caplog.text
def test_extract_data_dict():
    """Test extract_data_dict function."""
    data_vars = {
        "field_a": (["spin_number"], np.array([1, 2, 3])),
        "field_b": (["spin_number"], np.array([4, 5, 6])),
    coords = {
        "spin_number": np.array([0, 1, 2]),
        "energy_bin_geometric_mean": np.array([10.0, 20.0, 30.0]),
        "epoch": np.array(
            ["2025-01-01", "2025-01-02", "2025-01-03"], dtype="datetime64[ns]"
    ds = xr.Dataset(data_vars=data_vars, coords=coords)
    result = extract_data_dict(ds)
    assert set(result.keys()) == {
        "field_a",
        "field_b",
        "spin_number",
        "energy_bin_geometric_mean",
        "epoch",
    np.testing.assert_array_equal(result["field_a"], np.array([1, 2, 3]))
    np.testing.assert_array_equal(result["spin_number"], np.array([0, 1, 2]))
def test_filter_day_boundary_data():
    """Test filter_day_boundary_data filters epochs outside the processing day."""
    start_date = "20250901"
    start = str_yyyymmdd_to_ttj2000ns(start_date)
    one_day_ns = np.int64(86_400 * 1_000_000_000)
    # Epochs: one before the day, three within, one after
    epoch_values = np.array(
            start - 1,  # before day boundary
            start,  # exactly at start (included)
            start + one_day_ns // 2,  # midday (included)
            start + one_day_ns - 1,  # last ns of day (included)
            start + one_day_ns,  # exactly at next day start (excluded)
        dtype=np.int64,
    ds = xr.Dataset(
        {"value": ("epoch", np.arange(len(epoch_values)))},
        coords={"epoch": epoch_values},
    result = utils.filter_day_boundary_data(ds, start_date)
    assert result.sizes["epoch"] == 3
    np.testing.assert_array_equal(result["epoch"].values, epoch_values[1:4])
@pytest.mark.parametrize(
    "epoch_ns,raises",
        # midday of expected day — passes
        (int(1.5 * 86400 * 1e9), False),
        # exactly at lower tolerance boundary (24h before day start) — passes
        (0, False),
        # 1 ns before lower bound — more than 24h outside, raises
        (-1, True),
        # 1 ns past upper bound — more than 24h outside, raises
        (int(3 * 86400 * 1e9 + 1), True),
def test_check_epochs_within_day(epoch_ns, raises):
    """_check_epochs_within_day raises only when epoch is >24h outside expected day."""
    # lower = expected_day - 1 day (J2000 ns = 0), upper = expected_day + 2 days
    lower_ns = 0
    upper_ns = int(3 * 86400 * 1e9)
    day = np.datetime64("2025-01-01", "D")
    ds = xr.Dataset({"epoch": xr.DataArray(np.array([epoch_ns], dtype=np.int64))})
    with mock.patch(
        "imap_processing.utils.str_yyyymmdd_to_ttj2000ns",
        side_effect=[lower_ns, upper_ns],
        if raises:
            with pytest.raises(ValueError, match="more than 24 hours outside"):
                check_epochs_within_day_offsets([ds], day)
        else:
            check_epochs_within_day_offsets([ds], day)
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

test_utils.py

Latest commit

History

test_utils.py

File metadata and controls