Skip to content

Commit db5ac3f

Browse files
committed
ENH: Add data directory to manage the input/output
This adds the ability to set the data directory for reading and writing data files to an explicit location of the user's choosing through command line or environment variables.
1 parent f2560cf commit db5ac3f

16 files changed

Lines changed: 237 additions & 101 deletions

File tree

docs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ help:
1414

1515
clean:
1616
-rm -rf $(BUILDDIR)/*
17-
-rm -rf source/reference/generated
17+
-rm -rf source/code-documentation/generated
1818

1919
.PHONY: help Makefile
2020

docs/source/code-documentation/index.rst

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,31 @@ Instruments
2222
hi
2323
swapi
2424

25-
2625
Utility functions can be found in modules within the top package level.
2726

27+
Processing
28+
----------
29+
30+
To process an instrument, a command line utility is installed with the
31+
package. The command line utility is called ``imap_cli`` and
32+
takes the instrument and level as arguments. For example, to process
33+
the CODICE instrument at level 1, the command would be
34+
35+
.. code:: text
36+
37+
imap_cli --instrument codice --level 1
38+
39+
This will write output files to the default location, which is
40+
the current working directory + "/imap-data". To change the data
41+
directory, use the ``--data-dir`` option, or the environment
42+
variable ``IMAP_DATA_DIR``. For example to use a temporary directory
43+
44+
.. code:: text
45+
46+
imap_cli --instrument codice --level 1 --data-dir /tmp/imap-data
47+
# or equivalently with an environment variable
48+
IMAP_DATA_DIR=/tmp/imap-data imap_cli --instrument codice --level 1
49+
2850
Tools
2951
-----
3052

imap_processing/__init__.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,25 @@
1313
# packet definitions directory path.
1414
#
1515
# This directory is used by the imap_processing package to find the packet definitions.
16+
import os
1617
from pathlib import Path
1718

19+
# NOTE: Use a config dictionary, so it is a mutable global object,
20+
# otherwise updating previous imports from other modules
21+
# wouldn't have been updated globally (for example if referencing a string).
22+
config = {"DATA_DIR": Path(os.getenv("IMAP_DATA_DIR") or Path.cwd() / "imap-data")}
23+
"""imap_processing configuration dictionary.
24+
25+
DATA_DIR : This is where the file data is stored and organized by instrument and level.
26+
The default location is in the current working directory, but can be
27+
set on the command line using the --data-dir option, or through
28+
the environment variable IMAP_DATA_DIR.
29+
"""
30+
1831
# Eg. imap_module_directory = /usr/local/lib/python3.11/site-packages/imap_processing
1932
imap_module_directory = Path(__file__).parent
2033

21-
instruments = [
34+
INSTRUMENTS = [
2235
"codice",
2336
"glows",
2437
"hi",
@@ -31,7 +44,7 @@
3144
"ultra",
3245
]
3346

34-
processing_levels = {
47+
PROCESSING_LEVELS = {
3548
"codice": ["l0", "l1a", "l1b", "l2"],
3649
"glows": ["l0", "l1a", "l1b", "l2"],
3750
"hi": ["l0", "l1a", "l1b", "l1c", "l2"],

imap_processing/cdf/utils.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
"""Various utility functions to support creation of CDF files."""
22

3-
import os
3+
import logging
4+
from pathlib import Path
5+
from typing import Optional
46

57
import numpy as np
68
import xarray as xr
79
from cdflib.xarray import xarray_to_cdf
810

11+
import imap_processing
12+
913

1014
def calc_start_time(shcoarse_time: int):
1115
"""Calculate the datetime64 from the CCSDS secondary header information.
@@ -36,7 +40,9 @@ def calc_start_time(shcoarse_time: int):
3640

3741

3842
def write_cdf(
39-
data: xr.Dataset, description: str = "", mode: str = "", directory: str = ""
43+
data: xr.Dataset,
44+
description: str = "",
45+
directory: Optional[Path] = None,
4046
):
4147
"""Write the contents of "data" to a CDF file using cdflib.xarray_to_cdf.
4248
@@ -49,16 +55,19 @@ def write_cdf(
4955
5056
Parameters
5157
----------
52-
data (xarray.Dataset): The dataset object to convert to a CDF
53-
description (str): The description to insert into the file name after the
58+
data : xarray.Dataset
59+
The dataset object to convert to a CDF
60+
description : str, optional
61+
The description to insert into the file name after the
5462
orbit, before the SPICE field. No underscores allowed.
55-
mode (str): Instrument mode
56-
directory (str): The directory to write the file to
63+
directory : pathlib.Path
64+
The directory to write the file to. The default is obtained
65+
from the global imap_processing.config["DATA_DIR"].
5766
5867
Returns
5968
-------
60-
str
61-
The name of the file created
69+
pathlib.Path
70+
Path to the file created
6271
"""
6372
# Determine the start date of the data in the file,
6473
# based on the time of the first dust impact
@@ -81,24 +90,37 @@ def write_cdf(
8190
if (description.startswith("_") or not description)
8291
else f"_{description}"
8392
)
84-
mode = mode if (mode.startswith("_") or not mode) else f"_{mode}"
8593

8694
# Determine the file name based on the attributes in the xarray
8795
# Set file name based on this convention:
88-
# imap_<instrument>_<datalevel>_<mode>_<descriptor>_<startdate>_
96+
# imap_<instrument>_<datalevel>_<descriptor>_<startdate>_
8997
# <version>.cdf
9098
# data.attrs["Logical_source"] has the mission, instrument, and level
9199
# like this:
92100
# imap_idex_l1
93101
filename = (
94102
data.attrs["Logical_source"]
95-
+ mode
96103
+ description
97104
+ "_"
98105
+ date_string
99106
+ f"_v{data.attrs['Data_version']}.cdf"
100107
)
101-
filename_and_path = os.path.join(directory, filename)
108+
109+
if directory is None:
110+
# Storage directory
111+
# mission/instrument/data_level/year/month/filename
112+
# /<directory | DATA_DIR>/<instrument>/<data_level>/<year>/<month>
113+
_, instrument, data_level = data.attrs["Logical_source"].split("_")
114+
directory = imap_processing.config["DATA_DIR"] / instrument / data_level
115+
directory /= date_string[:4]
116+
directory /= date_string[4:6]
117+
filename_and_path = Path(directory)
118+
if not filename_and_path.exists():
119+
logging.info(
120+
"The directory does not exist, creating directory %s", filename_and_path
121+
)
122+
filename_and_path.mkdir(parents=True)
123+
filename_and_path /= filename
102124

103125
# Insert the final attribute:
104126
# The Logical_file_id is always the name of the file without the extension

imap_processing/cli.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
import argparse
1414
import sys
1515
from abc import ABC, abstractmethod
16+
from pathlib import Path
1617

17-
from imap_processing import instruments, processing_levels
18+
import imap_processing
1819

1920

2021
def _parse_args():
@@ -28,15 +29,25 @@ def _parse_args():
2829
description = (
2930
"This command line program invokes the processing pipeline "
3031
"for a specific instrument and data level. Example usage: "
31-
'"python run_processing swe l1a".'
32+
'"python run_processing --instrument swe --level l1a".'
33+
)
34+
data_dir_help = (
35+
"Directory to use for reading and writing IMAP data. "
36+
"The default is an 'imap-data/' folder in the "
37+
"current working directory. This can also be "
38+
"set using the IMAP_DATA_DIR environment variable."
39+
)
40+
instrument_help = (
41+
"The instrument to process. Acceptable values are: "
42+
f"{imap_processing.INSTRUMENTS}"
3243
)
33-
34-
instrument_help = f"The instrument to process. Acceptable values are: {instruments}"
3544
level_help = (
36-
f"The data level to process. Acceptable values are: {processing_levels}"
45+
"The data level to process. Acceptable values are: "
46+
f"{imap_processing.PROCESSING_LEVELS}"
3747
)
3848

3949
parser = argparse.ArgumentParser(prog="imap_cli", description=description)
50+
parser.add_argument("--data-dir", type=str, required=False, help=data_dir_help)
4051
parser.add_argument("--instrument", type=str, required=True, help=instrument_help)
4152
parser.add_argument("--level", type=str, required=True, help=level_help)
4253
args = parser.parse_args()
@@ -52,15 +63,23 @@ def _validate_args(args):
5263
args : argparse.Namespace
5364
An object containing the parsed arguments and their values
5465
"""
55-
if args.instrument not in instruments:
66+
if args.instrument not in imap_processing.INSTRUMENTS:
5667
raise ValueError(
57-
f"{args.instrument} is not in the supported instrument list: {instruments}"
68+
f"{args.instrument} is not in the supported instrument list: "
69+
f"{imap_processing.INSTRUMENTS}"
5870
)
59-
if args.level not in processing_levels[args.instrument]:
71+
if args.level not in imap_processing.PROCESSING_LEVELS[args.instrument]:
6072
raise ValueError(
6173
f"{args.level} is not a supported data level for the {args.instrument}"
62-
f" instrument, valid levels are: {processing_levels[args.instrument]}"
74+
" instrument, valid levels are: "
75+
f"{imap_processing.PROCESSING_LEVELS[args.instrument]}"
6376
)
77+
if args.data_dir:
78+
data_path = Path(args.data_dir)
79+
if not data_path.exists():
80+
raise ValueError(f"Data directory {args.data_dir} does not exist")
81+
# Set the data directory to the user-supplied value
82+
imap_processing.config["DATA_DIR"] = data_path
6483

6584

6685
class ProcessInstrument(ABC):
@@ -162,7 +181,13 @@ def process(self):
162181

163182

164183
def main():
165-
"""Create CLI entrypoint."""
184+
"""Run the processing for a specific instrument & data level.
185+
186+
Set up the command line arguments, parse them, and then invoke the
187+
appropriate instrument processing function.
188+
"""
189+
# NOTE: This is to allow the cli script to be installed and reference
190+
# this function for an entrypoint.
166191
args = _parse_args()
167192

168193
_validate_args(args)

imap_processing/codice/codice_l1a.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,13 @@
2121
from imap_processing.utils import group_by_apid, sort_by_time
2222

2323

24-
def codice_l1a(
25-
packets: list[space_packet_parser.parser.Packet], cdf_directory: str
26-
) -> str:
24+
def codice_l1a(packets: list[space_packet_parser.parser.Packet]) -> str:
2725
"""Process CoDICE l0 data to create l1a data products.
2826
2927
Parameters
3028
----------
3129
packets : list[space_packet_parser.parser.Packet]
3230
Decom data list that contains all APIDs
33-
cdf_directory : str
34-
The directory in which to write the output CDF file.
3531
3632
Returns
3733
-------
@@ -51,9 +47,7 @@ def codice_l1a(
5147
# Write data to CDF
5248
cdf_filename = write_cdf(
5349
data,
54-
mode="",
5550
description="hk",
56-
directory=cdf_directory,
5751
)
5852

5953
return cdf_filename

imap_processing/swe/l1a/swe_l1a.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from imap_processing.utils import group_by_apid, sort_by_time
1313

1414

15-
def swe_l1a(packets, cdf_filepath):
15+
def swe_l1a(packets):
1616
"""Process SWE l0 data into l1a data.
1717
1818
Receive all L0 data file. Based on appId, it
@@ -23,13 +23,11 @@ def swe_l1a(packets, cdf_filepath):
2323
----------
2424
packets: list
2525
Decom data list that contains all appIds
26-
cdf_filepath: str
27-
Folder path of where to write CDF file
2826
2927
Returns
3028
-------
31-
str
32-
Path name of where CDF file was created.
29+
pathlib.Path
30+
Path to where the CDF file was created.
3331
This is used to upload file from local to s3.
3432
TODO: test this later.
3533
"""
@@ -51,9 +49,8 @@ def swe_l1a(packets, cdf_filepath):
5149
data = create_dataset(packets=sorted_packets)
5250

5351
# write data to CDF
52+
mode = f"{data['APP_MODE'].data[0]}-" if apid == SWEAPID.SWE_APP_HK else ""
5453
return write_cdf(
5554
data,
56-
mode=f"{data['APP_MODE'].data[0]}" if apid == SWEAPID.SWE_APP_HK else "",
57-
description=filename_descriptors.get(apid),
58-
directory=cdf_filepath,
55+
description=f"{mode}{filename_descriptors.get(apid)}",
5956
)

imap_processing/swe/l1b/swe_l1b.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,17 @@
1010
from imap_processing.utils import convert_raw_to_eu
1111

1212

13-
def swe_l1b(l1a_dataset: xr.Dataset, cdf_filepath: str):
13+
def swe_l1b(l1a_dataset: xr.Dataset):
1414
"""Process data to L1B.
1515
1616
Parameters
1717
----------
1818
l1a_dataset : xarray.Dataset
1919
l1a data input
20-
cdf_filepath: str
21-
Folder path of where to write CDF file
2220
2321
Returns
2422
-------
25-
str
23+
pathlib.Path
2624
Path to the L1B file.
2725
2826
Raises
@@ -50,10 +48,8 @@ def swe_l1b(l1a_dataset: xr.Dataset, cdf_filepath: str):
5048
data = eu_data
5149
# Update global attributes to l1b global attributes
5250
data.attrs.update(swe_cdf_attrs.swe_l1b_global_attrs.output())
53-
51+
mode = f"{data['APP_MODE'].data[0]}-" if apid == SWEAPID.SWE_APP_HK else ""
5452
return write_cdf(
5553
data,
56-
mode=f"{data['APP_MODE'].data[0]}" if apid == SWEAPID.SWE_APP_HK else "",
57-
description=filename_descriptors.get(apid),
58-
directory=cdf_filepath,
54+
description=f"{mode}{filename_descriptors.get(apid)}",
5955
)

0 commit comments

Comments
 (0)