imap_processing/imap_processing/codice/codice_l2.py at patch-2 · mstrumik/imap_processing

History

1681 lines (1525 loc) · 64.7 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

"""

Perform CoDICE l2 processing.

This module processes CoDICE l1 files and creates L2 data products.

Notes

-----

from imap_processing.codice.codice_l2 import process_codice_l2

dataset = process_codice_l2(l1_filename)

"""

import datetime

import logging

from pathlib import Path

import numpy as np

import pandas as pd

import xarray as xr

from imap_data_access import ProcessingInputCollection, ScienceFilePath

from numpy.typing import NDArray

from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes

from imap_processing.cdf.utils import load_cdf

from imap_processing.codice.constants import (

GAIN_ID_TO_STR,

HALF_SPIN_FILLVAL,

HI_L2_ELEVATION_ANGLE,

HI_OMNI_VARIABLE_NAMES,

HI_SECTORED_VARIABLE_NAMES,

L2_HI_SECTORED_ANGLE,

LO_NSW_ANGULAR_VARIABLE_NAMES,

LO_NSW_SPECIES_VARIABLE_NAMES,

LO_POSITION_TO_ELEVATION_ANGLE,

LO_SW_ANGULAR_VARIABLE_NAMES,

LO_SW_PICKUP_ION_SPECIES_VARIABLE_NAMES,

LO_SW_SOLAR_WIND_SPECIES_VARIABLE_NAMES,

NSW_POSITIONS,

PUI_POSITIONS,

SOLAR_WIND_POSITIONS,

SSD_ID_TO_ELEVATION,

SSD_ID_TO_SPIN_ANGLE,

SW_POSITIONS,

)

from imap_processing.codice.utils import apply_replacements_to_attrs

logger = logging.getLogger(__name__)

def get_lo_de_energy_luts(

dependencies: ProcessingInputCollection,

) -> tuple[NDArray, NDArray]:

"""

Get the LO DE lookup tables for energy conversions.

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

Returns

-------

energy_lut : np.ndarray

An array of energy in keV for each energy table index.

energy_bins_lut : np.ndarray

An array of energy bins.

"""

# Get lookup tables

energy_table_file = dependencies.get_file_paths(

descriptor="l2-lo-onboard-energy-table"

)[0]

energy_bins_file = dependencies.get_file_paths(

descriptor="l2-lo-onboard-energy-bins"

)[0]

energy_lut = pd.read_csv(energy_table_file, header=None, skiprows=1).to_numpy()

energy_bins_lut = pd.read_csv(energy_bins_file, header=None, skiprows=1).to_numpy()[

:, 1

]

return energy_lut, energy_bins_lut

def get_mpq_calc_energy_conversion_vals(

dependencies: ProcessingInputCollection,

) -> np.ndarray:

"""

Get the mass per charge (MPQ) esa step to energy kev conversion lookup table values.

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

Returns

-------

esa_kev : np.ndarray

An array of energy in keV for each esa step.

"""

mpq_calc_lut_file = dependencies.get_file_paths(descriptor="l2-lo-onboard-mpq-cal")[

]

mpq_df = pd.read_csv(mpq_calc_lut_file, header=None)

k_factor = float(mpq_df.loc[0, 10])

esa_v = mpq_df.loc[4, 4:].to_numpy().astype(np.float64)

# Calculate the energy in keV for each esa step

esa_kev = esa_v * k_factor / 1000

return esa_kev

def get_mpq_calc_tof_conversion_vals(

dependencies: ProcessingInputCollection,

) -> np.ndarray:

"""

Get the MPQ calculation tof to ns conversion lookup table values.

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

Returns

-------

tof_ns : np.ndarray

Tof in ns for each TOF bit.

"""

mpq_calc_lut_file = dependencies.get_file_paths(descriptor="l2-lo-onboard-mpq-cal")[

]

mpq_df = pd.read_csv(mpq_calc_lut_file, header=None)

ns_channel_sq = float(mpq_df.loc[2, 1])

ns_channel = float(mpq_df.loc[3, 1])

tof_offset = float(mpq_df.loc[4, 1])

# Get the TOF bit to ns lookup

tof_bits = mpq_df.loc[6:, 0].to_numpy().astype(np.int64)

# Calculate the TOF in ns for each TOF bit

tof_ns = tof_bits**2 * ns_channel_sq + tof_bits * ns_channel + tof_offset

return tof_ns

def get_hi_de_luts(

dependencies: ProcessingInputCollection | None,

) -> tuple[np.ndarray, np.ndarray]:

"""

Load lookup tables for hi direct-event processing.

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

Returns

-------

energy_table : np.ndarray

2D array of energy lookup table with shape (ssd_energy, col).

tof_table : np.ndarray

2D array of tof lookup table with shape (tof_index, col).

"""

energy_table_file_path = dependencies.get_file_paths(

descriptor="l2-hi-energy-table"

)[0]

tof_table_file_path = dependencies.get_file_paths(descriptor="l2-hi-tof-table")[0]

# Read TOF CSV, skip first column which is an index

# Each row corresponds to a tof index and the columns are tof (ns) and E/n (MeV/n)

tof_table = (

pd.read_csv(tof_table_file_path, header=None, skiprows=1).iloc[:, 1:].to_numpy()

)

# Read energy table CSV, skip first column which is an index

# Each row corresponds to an ssd energy index and the columns map to a combination

# of gain and ssd id

energy_table = (

pd.read_csv(energy_table_file_path, header=None, skiprows=1)

.iloc[:, 1:]

.to_numpy()

)

return energy_table, tof_table

def get_geometric_factor_lut(

dependencies: ProcessingInputCollection | None,

path: Path | None = None,

) -> dict:

"""

Get the geometric factor lookup table.

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

path : pathlib.Path

Optional path used for I-ALiRT.

Returns

-------

geometric_factor_lut : dict

A dict with a full and reduced mode array with shape (esa_steps, position).

"""

if path is not None:

csv_path = path

else:

csv_path = Path(dependencies.get_file_paths(descriptor="l2-lo-gfactor")[0])

geometric_factors = pd.read_csv(csv_path)

# sort by esa step. They should already be sorted, but just in case

full = geometric_factors[geometric_factors["mode"] == "full"].sort_values(

by="esa_step"

)

reduced = geometric_factors[geometric_factors["mode"] == "reduced"].sort_values(

by="esa_step"

)

# Sort position columns to ensure the correct order

position_names_sorted = sorted(

[col for col in full if col.startswith("position")],

key=lambda x: int(x.split("_")[-1]),

)

return {

"full": full[position_names_sorted].to_numpy(),

"reduced": reduced[position_names_sorted].to_numpy(),

}

def get_efficiency_lut(

dependencies: ProcessingInputCollection | None,

path: Path | None = None,

) -> pd.DataFrame:

"""

Get the efficiency lookup table.

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

path : pathlib.Path

Optional path used for I-ALiRT.

Returns

-------

efficiency_lut : pandas.DataFrame

Contains the efficiency lookup table. Columns are:

species, product, esa_step, position_1, position_2, ..., position_24.

"""

if path is not None:

csv_path = path

else:

csv_path = Path(dependencies.get_file_paths(descriptor="l2-lo-efficiency")[0])

return pd.read_csv(csv_path)

def get_species_efficiency(species: str, efficiency: pd.DataFrame) -> xr.DataArray:

"""

Get the efficiency values for a given species.

Parameters

----------

species : str

The species name.

efficiency : pandas.DataFrame

The efficiency lookup table.

Returns

-------

efficiency : xarray.DataArray

A 2D array of efficiencies with shape (epoch, esa_steps).

"""

species_efficiency = efficiency[efficiency["species"] == species].sort_values(

by="esa_step"

)

# Sort position columns to ensure the correct order

position_names_sorted = sorted(

[col for col in species_efficiency if col.startswith("position")],

key=lambda x: int(x.split("_")[-1]),

)

# Shape: (esa_step, inst_az)

return xr.DataArray(

species_efficiency[position_names_sorted].to_numpy(),

dims=("esa_step", "inst_az"),

)

def compute_geometric_factors(

dataset: xr.Dataset, geometric_factor_lookup: dict, angular_product: bool = False

) -> xr.DataArray:

"""

Calculate geometric factors needed for intensity calculations.

Geometric factors are determined by comparing the half-spin values per

esa_step in the HALF_SPIN_LUT to the rgfo_half_spin values in the provided

L2 dataset.

If the half-spin value is less than the corresponding rgfo_half_spin value,

the geometric factor is set to 0.75 (full mode); otherwise, it is set to 0.5

(reduced mode). If the data is from after November 24th 2025, then reduced

mode is no longer applied and the geometric factor is always set to full mode.

NOTE: Half spin values are associated with ESA steps which corresponds to the

index of the energy_per_charge dimension that is between 0 and 127.

NOTE: If packet_version = 2, the Lo L1B product now contains variables that indicate

the esa step and spin sector during which the RGFO or NSO limits are triggered.

The spin sector variable ranges from 0-11 and is the instrument reported spin

sector. In the following algorithm, spin_angle refers to the L1B angular bin

(0 – 23) which is despun and spin_sector refers to the non-despun spin sector

reported from the instrument (0-11).

Parameters

----------

dataset : xarray.Dataset

The L2 dataset containing rgfo_half_spin data variable.

geometric_factor_lookup : dict

A dict with a full and reduced mode array with shape (esa_steps, position).

angular_product : bool

Whether the product being processed is an angular product. If True, then

the geometric factor calculation has additional steps to determine the exact

rgfo boundary.

Returns

-------

geometric_factors : xarray.DataArray

A 3D array of geometric factors with shape (epoch, esa_steps, positions).

"""

# Get half spin values per esa step from the dataset

# Add a new dim for spin_sector

half_spin_per_esa_step = dataset.half_spin_per_esa_step.values[:, :, np.newaxis]

# Expand dimensions to compare each rgfo_half_spin value against

# all half_spin_values and spin_sectors. Shape: (epoch, 1, 1)

rgfo_half_spin = dataset.rgfo_half_spin.data[:, np.newaxis, np.newaxis]

# After November 24th 2025 we need to do this step a different way.

start_date = dataset.attrs.get("Logical_file_id", None)

if start_date is None:

raise ValueError("Dataset is missing Logical_file_id attribute.")

processing_date = datetime.datetime.strptime(start_date.split("_")[4], "%Y%m%d")

date_switch = datetime.datetime(2025, 11, 24)

fsw_switch_date = datetime.datetime(2026, 1, 29)

# Only consider valid half spins

valid_half_spin = half_spin_per_esa_step != HALF_SPIN_FILLVAL

# TODO: Fix this calculation on days when the sci Lut changes. There may be

# different packet versions in the same dataset.

# Perform the comparison and calculate modes

if angular_product and dataset.packet_version.data[0] > 1:

# For angular products with packet version > 1, we have spin sector information

# to determine the exact boundary of the RGFO mode. Shape: (epoch, 1, 1)

# Mod by 12 to convert rgfo_spin_sector to half spin sector range of 0-11

rgfo_spin_sector = dataset.rgfo_spin_sector.data[:, np.newaxis, np.newaxis] % 12

rgfo_esa_step = dataset.rgfo_esa_step.data[:, np.newaxis, np.newaxis]

# Shape: (1, 1, spin_sector (24))

spin_sector = dataset.spin_sector.data[np.newaxis, np.newaxis, :]

# Shape: (1, esa_step (128), 1)

esa_step = dataset.esa_step.data[np.newaxis, :, np.newaxis]

at_boundary = half_spin_per_esa_step == rgfo_half_spin

modes = (

# Reduced mode (True) is applied where:

# 1. Half spin is valid.

valid_half_spin

& (

# 2. Half spin is greater than rgfo_half_spin.

(half_spin_per_esa_step > rgfo_half_spin)

| (

# 3. Where half_spin_per_esa_step equals rgfo_half_spin AND

at_boundary

& (

# a. The spin sector mod 12 is greater than rgfo_spin_sector

((spin_sector % 12) > rgfo_spin_sector)

# b. OR the spin sector mod 12 equals rgfo_spin_sector AND the

# esa step is greater than rgfo_esa_step

(

((spin_sector % 12) == rgfo_spin_sector)

& (esa_step > rgfo_esa_step)

)

elif (processing_date < date_switch) | (processing_date >= fsw_switch_date):

# Modes will be true (reduced mode) anywhere half_spin > rgfo_half_spin

# otherwise false (full mode)

modes = valid_half_spin & (half_spin_per_esa_step > rgfo_half_spin)

else:

# After November 24th, 2025, we no longer apply reduced geometric factors;

# always use the full geometric factor lookup.

modes = np.zeros_like(half_spin_per_esa_step, dtype=bool)

# If the last dimension of modes is 24, we have spin sector information and

# need to apply the geometric factor lookup differently

if modes.shape[-1] == 24:

# Get the geometric factors based on the modes

# expand the mode array to include a dimension for "inst_az" (also shape=24)

modes = modes[:, :, :, np.newaxis] # Shape (epoch, esa_step, 24, 1)

gf = np.where(

modes, # Shape (epoch, esa_step, 24, 1)

geometric_factor_lookup["reduced"][:, np.newaxis, :], # (esa_step, 1, 24)

geometric_factor_lookup["full"][:, np.newaxis, :], # (esa_step, 1, 24)

) # Shape: (epoch, esa_step, spin_sector, inst_az)

return xr.DataArray(gf, dims=("epoch", "esa_step", "spin_sector", "inst_az"))

else:

# Get the geometric factors based on the modes

gf = np.where(

modes, # Shape (epoch, esa_step, 1)

geometric_factor_lookup["reduced"], # (esa_step, 24)

geometric_factor_lookup["full"], # (esa_step, 24)

) # Shape: (epoch, esa_step, inst_az)

return xr.DataArray(gf, dims=("epoch", "esa_step", "inst_az"))

def calculate_intensity(

dataset: xr.Dataset,

species_list: list,

geometric_factors: xr.DataArray,

efficiency: pd.DataFrame,

positions: list,

average_across_positions: bool = False,

) -> xr.Dataset:

"""

Calculate species or angular intensities.

Parameters

----------

dataset : xarray.Dataset

The L2 dataset to process.

species_list : list

List of species variable names to calculate intensity.

geometric_factors : np.ndarray

The geometric factors array with shape (epoch, esa_steps).

efficiency : pandas.DataFrame

The efficiency lookup table.

positions : list

A list of position indices to select from the geometric factor and

efficiency lookup tables.

average_across_positions : bool

Whether to average the efficiencies and geometric factors across the selected

positions. Default is False.

Returns

-------

xarray.Dataset

The updated L2 dataset with species intensities calculated.

"""

# Select the relevant positions from the geometric factors

# TODO revisit gfactor calculation. For pickup ions, only position 0 is used

# Eventually, the CoDICE team wants to standardize this.

if species_list == LO_SW_PICKUP_ION_SPECIES_VARIABLE_NAMES:

geometric_factors = geometric_factors.isel(inst_az=[0])

else:

geometric_factors = geometric_factors.isel(inst_az=positions)

if average_across_positions:

# take the mean geometric factor across positions

geometric_factors = geometric_factors.mean(dim="inst_az")

scalar = len(positions)

else:

scalar = 1

# Calculate the angular intensities using the provided geometric factors and

# efficiency.

# intensity = species_rate / (gm * eff * esa_step) for position and spin angle

for species in species_list:

# Shape: (epoch, esa_step, inst_az)

species_eff = get_species_efficiency(species, efficiency).isel(

inst_az=positions

)

if species_eff.size == 0:

logger.warning(f"No efficiency data found for species {species}. Skipping.")

continue

if average_across_positions:

# Take the mean efficiency across positions

species_eff = species_eff.mean(dim="inst_az")

# Shape: (epoch, esa_step, inst_az) or

# (epoch, esa_step) if averaged

denominator = (

scalar * geometric_factors * species_eff * dataset["energy_per_charge"]

)

if species not in dataset:

raise ValueError(f"Species {species} not found in dataset.")

else:

# Only replace the data with calculated intensity to keep the attributes

dataset[species].data = (dataset[species] / denominator).data

# Also calculate uncertainty if available

species_uncertainty = f"unc_{species}"

if species_uncertainty not in dataset:

logger.warning(

f"Uncertainty {species_uncertainty} not found in dataset."

f" Filling with NaNS."

)

dataset[species_uncertainty] = np.full(

dataset["esa_step"].data.shape, np.nan

)

else:

dataset[species_uncertainty].data = (

dataset[species_uncertainty] / denominator

).data

return dataset

def process_lo_species_intensity(

dataset: xr.Dataset,

species_list: list,

geometric_factors: xr.DataArray,

efficiency: pd.DataFrame,

positions: list,

) -> xr.Dataset:

"""

Process the lo-species L2 dataset to calculate species intensities.

Parameters

----------

dataset : xarray.Dataset

The L2 dataset to process.

species_list : list

List of species variable names to calculate intensity.

geometric_factors : xarray.DataArray

The geometric factors array with shape (epoch, esa_steps).

efficiency : pandas.DataFrame

The efficiency lookup table.

positions : list

A list of position indices to select from the geometric factor and

efficiency lookup tables.

Returns

-------

xarray.Dataset

The updated L2 dataset with species intensities calculated.

"""

# Calculate the species intensities using the provided geometric factors and

# efficiency.

dataset = calculate_intensity(

dataset,

species_list,

geometric_factors,

efficiency,

positions,

average_across_positions=True,

)

cdf_attrs = ImapCdfAttributes()

cdf_attrs.add_instrument_variable_attrs("codice", "l2-lo-species")

if positions == SOLAR_WIND_POSITIONS:

species_attrs = cdf_attrs.get_variable_attributes("lo-sw-species-attrs")

unc_attrs = cdf_attrs.get_variable_attributes("lo-sw-species-unc-attrs")

elif positions == PUI_POSITIONS:

species_attrs = cdf_attrs.get_variable_attributes("lo-pui-species-attrs")

unc_attrs = cdf_attrs.get_variable_attributes("lo-pui-species-unc-attrs")

else:

species_attrs = cdf_attrs.get_variable_attributes("lo-species-attrs")

unc_attrs = cdf_attrs.get_variable_attributes("lo-species-unc-attrs")

# add uncertainties to species list

species_list = species_list + [f"unc_{var}" for var in species_list]

# update species attrs

for species in species_list:

attrs = unc_attrs if "unc" in species else species_attrs

# Replace {species} and {direction} in attrs

attrs = apply_replacements_to_attrs(attrs, {"species": species})

dataset[species].attrs.update(attrs)

# Since the RGFO mode is implemented within a half-spin at a given esa step and

# spin sector and since the species data is summed over all spin sectors, the data

# during this half spin cannot be de-convolved. Thus, the intensity during the

# half_spin = RGFO_half_spin should be set to fill values.

half_spin_boundary = (

dataset.half_spin_per_esa_step.data

== dataset.rgfo_half_spin.data[:, np.newaxis]

)

# Add an extra dimension to match the species data shape (361, 128, 1)

half_spin_boundary = half_spin_boundary[:, :, np.newaxis]

for species in species_list:

dataset[species].data[half_spin_boundary] = np.nan

return dataset

def process_lo_angular_intensity(

dataset: xr.Dataset,

species_list: list,

geometric_factors: xr.DataArray,

efficiency: pd.DataFrame,

positions: list,

) -> xr.Dataset:

"""

Process the lo-species L2 dataset to calculate angular intensities.

Parameters

----------

dataset : xarray.Dataset

The L2 dataset to process.

species_list : list

List of species variable names to calculate intensity.

geometric_factors : xarray.DataArray

The geometric factors array with shape (epoch, esa_steps).

efficiency : pandas.DataFrame

The efficiency lookup table.

positions : list

A list of position indices to select from the geometric factor and

efficiency lookup tables.

Returns

-------

xarray.Dataset

The updated L2 dataset with angular intensities calculated.

"""

# Calculate the angular intensities using the provided geometric factors and

# efficiency.

dataset = calculate_intensity(

dataset,

species_list,

geometric_factors,

efficiency,

positions,

average_across_positions=False,

)

# transform positions to elevation angles

if positions == SW_POSITIONS:

pos_to_el = LO_POSITION_TO_ELEVATION_ANGLE["sw"]

position_index_to_adjust = 0

direction = "Sunward"

elif positions == NSW_POSITIONS:

pos_to_el = LO_POSITION_TO_ELEVATION_ANGLE["nsw"]

position_index_to_adjust = 9

direction = "Non-Sunward"

else:

raise ValueError("Unknown positions for elevation angle mapping.")

# Create a new coordinate for elevation_angle based on inst_az

dataset = dataset.assign_coords(

elevation_angle=(

"inst_az",

[pos_to_el[pos] for pos in dataset["inst_az"].data],

)

# add uncertainties to species list

species_list = species_list + [f"unc_{var}" for var in species_list]

# Take the mean across elevation angles and restore the original dimension order

dataset_converted = (

dataset[species_list]

.groupby("elevation_angle")

.sum(keep_attrs=True, skipna=False) # One position should always contain zeros

# so sum is safe

# Restore original dimension order because groupby moves the grouped

# dimension to the front

.transpose("epoch", "esa_step", "spin_sector", "elevation_angle", ...)

)

# Create a new coordinate for spin angle based on spin_sector

# Use equation from section 11.2.2 of algorithm document

dataset = dataset.assign_coords(

spin_angle=("spin_sector", dataset["spin_sector"].data * 15.0 + 7.5)

)

dataset = dataset.drop_vars(species_list).merge(dataset_converted)

# Positions 0 and 10 only observe half of the 24 spins for each esa step.

# To account for this, we replicate the counts observed in position 0 and 10 for

# each esa step to either spin angles 0-11 or 12-23, depending on the pixel

# orientation (A/B). See section 11.2.2 of the CoDICE algorithm document

# Use the variable "half_spin_per_esa_step" to determine the pixel orientations.

# When the half spin number is even, the configuration is A and when the half spin

# is odd, the configuration is B.

# TODO handle when half_spin_per_esa_step changes in the middle of the dataset

half_spin_per_esa_step = dataset["half_spin_per_esa_step"].data[0]

# only consider valid half spin values

valid_half_spin = half_spin_per_esa_step != HALF_SPIN_FILLVAL

a_inds = np.nonzero(valid_half_spin & (half_spin_per_esa_step % 2 == 0))[0]

b_inds = np.nonzero(valid_half_spin & (half_spin_per_esa_step % 2 == 1))[0]

position_index = position_index_to_adjust

for species in species_list:

# Create a copy of the dataset to avoid modifying the original

species_data = dataset[species].data.copy()

# Determine the correct spin indices based on the position

spin_sectors = dataset["spin_sector"].data

spin_inds_1 = np.where(spin_sectors >= 12)[0]

spin_inds_2 = np.where(spin_sectors < 12)[0]

# if position_index is 9, swap the spin indices

if position_index == 9:

spin_inds_1, spin_inds_2 = spin_inds_2, spin_inds_1

# Assign the values to the correct positions and spin sectors

dataset[species].values[

:, a_inds[:, np.newaxis], spin_inds_1, position_index

] = species_data[:, a_inds[:, np.newaxis], spin_inds_2, position_index]

dataset[species].values[

:, b_inds[:, np.newaxis], spin_inds_2, position_index

] = species_data[:, b_inds[:, np.newaxis], spin_inds_1, position_index]

cdf_attrs = ImapCdfAttributes()

cdf_attrs.add_instrument_variable_attrs("codice", "l2-lo-angular")

species_attrs = cdf_attrs.get_variable_attributes("lo-angular-attrs")

unc_attrs = cdf_attrs.get_variable_attributes("lo-angular-unc-attrs")

# update species attrs

for species in species_list:

attrs = unc_attrs if "unc" in species else species_attrs

# Replace {species} and {direction} in attrs

attrs = apply_replacements_to_attrs(

attrs, {"species": species, "direction": direction}

)

dataset[species].attrs.update(attrs)

# make sure elevation_angle is a coordinate and has the right attrs

dataset["elevation_angle"].attrs.update(

cdf_attrs.get_variable_attributes("elevation_angle", check_schema=False)

)

dataset["elevation_angle_label"] = xr.DataArray(

dataset["elevation_angle"].data.astype(str),

dims=("elevation_angle",),

attrs=cdf_attrs.get_variable_attributes(

"elevation_angle_label", check_schema=False

)

# update spin angle attributes

dataset["spin_angle"].attrs = cdf_attrs.get_variable_attributes(

"spin_angle", check_schema=False

)

# update spin sector attributes

dataset["spin_sector"].attrs = cdf_attrs.get_variable_attributes(

"spin_sector", check_schema=False

)

return dataset

def process_hi_omni(dependencies: ProcessingInputCollection) -> xr.Dataset:

"""

Process the hi-omni L1B dataset to calculate omni-directional intensities.

See section 11.1.3 of the CoDICE algorithm document for details.

The formula for omni-directional intensities is::

l1B species data / (

geometric_factor * number_of_ssd * efficiency * energy_passband

)

Geometric factor is constant for all species which is 0.013.

Number of SSD is constant for all species which is 12.

Efficiency is provided in a CSV file for each species and energy bin.

Energy passband is calculated from L1B variables energy_bin_minus + energy_bin_plus

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

Returns

-------

xarray.Dataset

The updated L2 dataset with omni-directional intensities calculated.

"""

l1b_file = dependencies.get_file_paths(descriptor="hi-omni")[0]

l1b_dataset = load_cdf(l1b_file)

# Read the efficiencies data from the CSV file

efficiencies_file = dependencies.get_file_paths(descriptor="l2-hi-omni-efficiency")[

]

efficiencies_df = pd.read_csv(efficiencies_file)

# Omni product has 8 species and each species has different shape.

# Eg.

# h - (epoch, 15)

# c - (epoch, 18)

# uh - (epoch, 5)

# etc.

# Because of that, we need to loop over each species and calculate

# omni-directional intensities separately.

# Read geometric factor. It is labeled as GF in the CSV file

geometric_factor = efficiencies_df[efficiencies_df["species"] == "GF"].values[0][-1]

for species in HI_OMNI_VARIABLE_NAMES:

# replace '_' with '-' to match CSV species naming

species_csv_name = species.replace("_", "-")

species_data = efficiencies_df[efficiencies_df["species"] == species_csv_name]

# Read current species' efficiency

species_efficiencies = species_data["average_efficiency"].values[np.newaxis, :]

# Calculate energy passband from L1B data

energy_passbands = (

l1b_dataset[f"energy_{species}_plus"]

+ l1b_dataset[f"energy_{species}_minus"]

).values[np.newaxis, :]

# Calculate omni-directional intensities

omni_direction_intensities = l1b_dataset[species] / (

geometric_factor * species_efficiencies * energy_passbands

)

# Store by replacing existing species data with omni-directional intensities

l1b_dataset[species].values = omni_direction_intensities

# Calculate uncertainty if available

species_uncertainty = f"unc_{species}"

if species_uncertainty in l1b_dataset:

omni_uncertainties = l1b_dataset[species_uncertainty] / (

geometric_factor * species_efficiencies * energy_passbands

)

# Store by replacing existing uncertainty data with omni-directional

# uncertainties

l1b_dataset[species_uncertainty].values = omni_uncertainties

# TODO: this may go away once Joey and I fix L1B CDF

# Update global CDF attributes

cdf_attrs = ImapCdfAttributes()

cdf_attrs.add_instrument_global_attrs("codice")

cdf_attrs.add_instrument_variable_attrs("codice", "l2-hi-omni")

l1b_dataset.attrs = cdf_attrs.get_global_attributes("imap_codice_l2_hi-omni")

# TODO: ask Joey to add attrs for epoch_delta_plus and epoch_delta_minus

# and update dimension to be 'epoch' in L1B data

for variable in l1b_dataset.data_vars:

if variable in ["epoch_delta_plus", "epoch_delta_minus", "data_quality"]:

l1b_dataset[variable].attrs = cdf_attrs.get_variable_attributes(

variable, check_schema=False

)

else:

l1b_dataset[variable].attrs = cdf_attrs.get_variable_attributes(

variable, check_schema=False

)

# Add these new coordinates

new_coords = {

"energy_h": xr.DataArray(

l1b_dataset["energy_h"].values,

dims=("energy_h",),

attrs=cdf_attrs.get_variable_attributes("energy_h", check_schema=False),

"energy_h_label": xr.DataArray(

l1b_dataset["energy_h"].values.astype(str),

dims=("energy_h",),

attrs=cdf_attrs.get_variable_attributes(

"energy_h_label", check_schema=False

"energy_he3": xr.DataArray(

l1b_dataset["energy_he3"].values,

dims=("energy_he3",),

attrs=cdf_attrs.get_variable_attributes("energy_he3", check_schema=False),

"energy_he3_label": xr.DataArray(

l1b_dataset["energy_he3"].values.astype(str),

dims=("energy_he3",),

attrs=cdf_attrs.get_variable_attributes(

"energy_he3_label", check_schema=False

"energy_he4": xr.DataArray(

l1b_dataset["energy_he4"].values,

dims=("energy_he4",),

attrs=cdf_attrs.get_variable_attributes("energy_he4", check_schema=False),

"energy_he4_label": xr.DataArray(

l1b_dataset["energy_he4"].values.astype(str),

dims=("energy_he4",),

attrs=cdf_attrs.get_variable_attributes(

"energy_he4_label", check_schema=False

"energy_c": xr.DataArray(

l1b_dataset["energy_c"].values,

dims=("energy_c",),

attrs=cdf_attrs.get_variable_attributes("energy_c", check_schema=False),

"energy_c_label": xr.DataArray(

l1b_dataset["energy_c"].values.astype(str),

dims=("energy_c",),

attrs=cdf_attrs.get_variable_attributes(

"energy_c_label", check_schema=False

"energy_o": xr.DataArray(

l1b_dataset["energy_o"].values,

dims=("energy_o",),

attrs=cdf_attrs.get_variable_attributes("energy_o", check_schema=False),

"energy_o_label": xr.DataArray(

l1b_dataset["energy_o"].values.astype(str),

dims=("energy_o",),

attrs=cdf_attrs.get_variable_attributes(

"energy_o_label", check_schema=False

"energy_ne_mg_si": xr.DataArray(

l1b_dataset["energy_ne_mg_si"].values,

dims=("energy_ne_mg_si",),

attrs=cdf_attrs.get_variable_attributes(

"energy_ne_mg_si", check_schema=False

"energy_ne_mg_si_label": xr.DataArray(

l1b_dataset["energy_ne_mg_si"].values.astype(str),

dims=("energy_ne_mg_si",),

attrs=cdf_attrs.get_variable_attributes(

"energy_ne_mg_si_label", check_schema=False

"energy_fe": xr.DataArray(

l1b_dataset["energy_fe"].values,

dims=("energy_fe",),

attrs=cdf_attrs.get_variable_attributes("energy_fe", check_schema=False),

"energy_fe_label": xr.DataArray(

l1b_dataset["energy_fe"].values.astype(str),

dims=("energy_fe",),

attrs=cdf_attrs.get_variable_attributes(

"energy_fe_label", check_schema=False

"energy_uh": xr.DataArray(

l1b_dataset["energy_uh"].values,

dims=("energy_uh",),

attrs=cdf_attrs.get_variable_attributes("energy_uh", check_schema=False),

"energy_uh_label": xr.DataArray(

l1b_dataset["energy_uh"].values.astype(str),

dims=("energy_uh",),

attrs=cdf_attrs.get_variable_attributes(

"energy_uh_label", check_schema=False

"energy_junk": xr.DataArray(

l1b_dataset["energy_junk"].values,

dims=("energy_junk",),

attrs=cdf_attrs.get_variable_attributes("energy_junk", check_schema=False),

"energy_junk_label": xr.DataArray(

l1b_dataset["energy_junk"].values.astype(str),

dims=("energy_junk",),

attrs=cdf_attrs.get_variable_attributes(

"energy_junk_label", check_schema=False

"epoch": xr.DataArray(

l1b_dataset["epoch"].data,

dims=("epoch",),

attrs=cdf_attrs.get_variable_attributes("epoch", check_schema=False),

"epoch_delta_plus": l1b_dataset["epoch_delta_plus"],

"epoch_delta_minus": l1b_dataset["epoch_delta_minus"],

}

l1b_dataset["epoch"].attrs["DELTA_MINUS_VAR"] = "epoch_delta_minus"

l1b_dataset["epoch"].attrs["DELTA_PLUS_VAR"] = "epoch_delta_plus"

l1b_dataset = l1b_dataset.assign_coords(new_coords)

return l1b_dataset

def process_hi_sectored(dependencies: ProcessingInputCollection) -> xr.Dataset:

"""

Process the hi-omni L1B dataset to calculate omni-directional intensities.

See section 11.1.2 of the CoDICE algorithm document for details.

The formula for omni-directional intensities is::

l1b species data / (geometric_factor * efficiency * energy_passband)

Geometric factor is constant for all species and is 0.013.

Efficiency is provided in a CSV file for each species and energy bin and

position.

Energy passband is calculated from energy_bin_minus + energy_bin_plus

Parameters

----------

dependencies : ProcessingInputCollection

The collection of processing input files.

Returns

-------

xarray.Dataset

The updated L2 dataset with omni-directional intensities calculated.

"""

file_path = dependencies.get_file_paths(descriptor="hi-sectored")[0]

l1b_dataset = load_cdf(file_path)

# Update global CDF attributes

cdf_attrs = ImapCdfAttributes()

cdf_attrs.add_instrument_global_attrs("codice")

cdf_attrs.add_instrument_variable_attrs("codice", "l2-hi-sectored")

# Overwrite L1B variable attributes with L2 variable attributes

l2_dataset = xr.Dataset(

coords={

"spin_sector": l1b_dataset["spin_sector"],

"spin_sector_label": xr.DataArray(

l1b_dataset["spin_sector"].values.astype(str),

dims=("spin_sector",),

attrs=cdf_attrs.get_variable_attributes(

"spin_sector_label", check_schema=False

"energy_h": xr.DataArray(

l1b_dataset["energy_h"].values,

dims=("energy_h",),

attrs=cdf_attrs.get_variable_attributes("energy_h", check_schema=False),

"energy_h_label": xr.DataArray(

l1b_dataset["energy_h"].values.astype(str),

dims=("energy_h",),

attrs=cdf_attrs.get_variable_attributes(

"energy_h_label", check_schema=False

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

codice_l2.py

Latest commit

History

codice_l2.py

File metadata and controls