Skip to content

Commit 5defcfa

Browse files
committed
Extra error checks added to ITS data decoding
1 parent 0fa923c commit 5defcfa

5 files changed

Lines changed: 127 additions & 31 deletions

File tree

Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/AlpideCoder.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#include <string>
1919
#include <cstdint>
2020
#include <FairLogger.h>
21-
21+
#include <iostream>
2222
#include "PayLoadCont.h"
2323

2424
#include "ITSMFTReconstruction/PixelData.h"
@@ -230,7 +230,9 @@ class AlpideCoder
230230
buffer.clear(); // 0 padding reached (end of the cable data), no point in continuing
231231
break;
232232
}
233-
return unexpectedEOF("Unknown word"); // either error
233+
std::stringstream stream;
234+
stream << "Unknown word 0x" << std::hex << int(dataC) << " [mode = 0x" << int(expectInp) << "]";
235+
return unexpectedEOF(stream.str().c_str()); // error
234236
}
235237

236238
return chipData.getData().size();

Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingITS.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ class ChipMappingITS
6565
link = (feeID >> 8) & 0x3;
6666
}
6767

68+
///< impose user defined FEEId -> ruSW (staveID) conversion, to be used only for forced decoding of corrupted data
69+
void imposeFEEId2RUSW(uint16_t feeID, uint16_t ruSW);
70+
6871
///< modify linkID field in FEEId
6972
uint16_t modifyLinkInFEEId(uint16_t feeID, uint16_t linkID) const
7073
{

Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/ChipMappingMFT.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ class ChipMappingMFT
9696
return ChipMappingData[chipID].module;
9797
}
9898

99+
int getCablesOnRUType(int ruType) const { return 0; }
100+
99101
int getNChipsInModule(int modID) const
100102
{
101103
return ModuleMappingData[modID].nChips;
@@ -116,6 +118,9 @@ class ChipMappingMFT
116118
return ModuleMappingData[ChipMappingData[chipID].module].layer;
117119
}
118120

121+
///< impose user defined FEEId -> ruSW (staveID) conversion, to be used only for forced decoding of corrupted data
122+
void imposeFEEId2RUSW(uint16_t, uint16_t) {}
123+
119124
private:
120125
int invalid() const;
121126
static constexpr int NModules = 280;

Detectors/ITSMFT/common/reconstruction/include/ITSMFTReconstruction/RawPixelReader.h

Lines changed: 98 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct GBTLinkDecodingStat {
6464
ErrNoDataForActiveLane, // no data was seen for lane (which was not in timeout)
6565
ErrIBChipLaneMismatch, // chipID (on module) was different from the lane ID on the IB stave
6666
ErrCableDataHeadWrong, // cable data does not start with chip header or empty chip
67+
ErrInvalidActiveLanes, // active lanes pattern conflicts with expected for given RU type
6768
NErrorsDefined
6869
};
6970
uint32_t ruLinkID = 0; // Link ID within RU
@@ -117,7 +118,8 @@ struct GBTLinkDecodingStat {
117118
"Data was received for stopped lane", // ErrDataForStoppedLane
118119
"No data was seen for lane (which was not in timeout)", // ErrNoDataForActiveLane
119120
"ChipID (on module) was different from the lane ID on the IB stave", // ErrIBChipLaneMismatch
120-
"Cable data does not start with chip header or empty chip" // ErrCableDataHeadWrong
121+
"Cable data does not start with chip header or empty chip", // ErrCableDataHeadWrong
122+
"Active lanes pattern conflicts with expected for given RU type" // ErrInvalidActiveLanes
121123
};
122124

123125
ClassDefNV(GBTLinkDecodingStat, 1);
@@ -126,12 +128,18 @@ struct GBTLinkDecodingStat {
126128
constexpr std::array<std::string_view, GBTLinkDecodingStat::NErrorsDefined> GBTLinkDecodingStat::ErrNames;
127129

128130
struct RawDecodingStat {
131+
enum DecErrors : int {
132+
ErrInvalidFEEId, // RDH provided invalid FEEId
133+
NErrorsDefined
134+
};
135+
129136
using ULL = unsigned long long;
130137
uint64_t nPagesProcessed = 0; // total number of pages processed
131138
uint64_t nRUsProcessed = 0; // total number of RUs processed (1 RU may take a few pages)
132139
uint64_t nBytesProcessed = 0; // total number of bytes (rdh->memorySize) processed
133140
uint64_t nNonEmptyChips = 0; // number of non-empty chips found
134141
uint64_t nHitsDecoded = 0; // number of hits found
142+
std::array<int, NErrorsDefined> errorCounts = {}; // error counters
135143

136144
RawDecodingStat() = default;
137145

@@ -142,15 +150,30 @@ struct RawDecodingStat {
142150
nBytesProcessed = 0;
143151
nNonEmptyChips = 0;
144152
nHitsDecoded = 0;
153+
errorCounts.fill(0);
145154
}
146155

147-
void print() const
156+
void print(bool skipEmpty = true) const
148157
{
149158
printf("\nDecoding statistics\n");
150159
printf("%llu bytes for %llu RUs processed in %llu pages\n", (ULL)nBytesProcessed, (ULL)nRUsProcessed, (ULL)nPagesProcessed);
151160
printf("%llu hits found in %llu non-empty chips\n", (ULL)nHitsDecoded, (ULL)nNonEmptyChips);
161+
int nErr = 0;
162+
for (int i = NErrorsDefined; i--;) {
163+
nErr += errorCounts[i];
164+
}
165+
printf("Decoding errors: %d\n", nErr);
166+
for (int i = 0; i < NErrorsDefined; i++) {
167+
if (!skipEmpty || errorCounts[i]) {
168+
printf("%-70s: %d\n", ErrNames[i].data(), errorCounts[i]);
169+
}
170+
}
152171
}
153172

173+
static constexpr std::array<std::string_view, NErrorsDefined> ErrNames = {
174+
"RDH cointains invalid FEEID" // ErrInvalidFEEId
175+
};
176+
154177
ClassDefNV(RawDecodingStat, 1);
155178
};
156179

@@ -317,7 +340,7 @@ class RawPixelReader : public PixelReader
317340
ChipInfo chInfo;
318341
UShort_t curChipID = 0xffff; // currently processed SW chip id
319342
mInteractionRecord = bcData;
320-
ruSWMax = (ruSWMax < uint8_t(MAP.getNRUs())) ? ruSWMax : MAP.getNRUs() - 1;
343+
ruSWMax = (ruSWMax < uint8_t(mMAP.getNRUs())) ? ruSWMax : mMAP.getNRUs() - 1;
321344

322345
if (mNRUs < int(ruSWMax) - ruSWMin) { // book containers if needed
323346
for (uint8_t ru = ruSWMin; ru <= ruSWMax; ru++) {
@@ -330,7 +353,7 @@ class RawPixelReader : public PixelReader
330353
if (!nLinks) {
331354
LOG(INFO) << "Imposing single link readout for RU " << int(ru);
332355
ruData.links[0] = std::make_unique<GBTLink>();
333-
ruData.links[0]->lanes = MAP.getCablesOnRUType(ruData.ruInfo->ruType);
356+
ruData.links[0]->lanes = mMAP.getCablesOnRUType(ruData.ruInfo->ruType);
334357
mNLinks++;
335358
}
336359
}
@@ -340,7 +363,7 @@ class RawPixelReader : public PixelReader
340363
for (int id = from; id < last; id++) {
341364
const auto& dig = digiVec[id];
342365
if (curChipID != dig.getChipIndex()) {
343-
MAP.getChipInfoSW(dig.getChipIndex(), chInfo);
366+
mMAP.getChipInfoSW(dig.getChipIndex(), chInfo);
344367
if (chInfo.ru < ruSWMin || chInfo.ru > ruSWMax) { // ignore this chip?
345368
continue;
346369
}
@@ -356,7 +379,7 @@ class RawPixelReader : public PixelReader
356379
int minPages = 0xffffff;
357380
for (mCurRUDecodeID = ruSWMin; mCurRUDecodeID <= int(ruSWMax); mCurRUDecodeID++) {
358381
curRUDecode = &mRUDecodeVec[mCurRUDecodeID];
359-
uint16_t next2Proc = 0, nchTot = MAP.getNChipsOnRUType(curRUDecode->ruInfo->ruType);
382+
uint16_t next2Proc = 0, nchTot = mMAP.getNChipsOnRUType(curRUDecode->ruInfo->ruType);
360383
for (int ich = 0; ich < curRUDecode->nChipsFired; ich++) {
361384
auto& chipData = curRUDecode->chipsData[ich];
362385
convertEmptyChips(next2Proc, chipData.getChipID()); // if needed store EmptyChip flags
@@ -381,7 +404,7 @@ class RawPixelReader : public PixelReader
381404

382405
auto& ruData = mRUDecodeVec[mCurRUDecodeID]; // current RU container
383406
// fetch info of the chip with chipData->getChipID() ID within the RU
384-
const auto& chip = *MAP.getChipOnRUInfo(ruData.ruInfo->ruType, chipData.getChipID());
407+
const auto& chip = *mMAP.getChipOnRUInfo(ruData.ruInfo->ruType, chipData.getChipID());
385408
ruData.cableHWID[chip.cableSW] = chip.cableHW; // register the cable HW ID
386409

387410
auto& pixels = chipData.getData();
@@ -403,7 +426,7 @@ class RawPixelReader : public PixelReader
403426
// add empty chip words to respective cable's buffers for all chips of the current RU container
404427
auto& ruData = mRUDecodeVec[mCurRUDecodeID]; // current RU container
405428
for (int chipIDSW = fromChip; chipIDSW < uptoChip; chipIDSW++) { // flag chips w/o data
406-
const auto& chip = *MAP.getChipOnRUInfo(ruData.ruInfo->ruType, chipIDSW);
429+
const auto& chip = *mMAP.getChipOnRUInfo(ruData.ruInfo->ruType, chipIDSW);
407430
ruData.cableHWID[chip.cableSW] = chip.cableHW; // register the cable HW ID
408431
ruData.cableData[chip.cableSW].ensureFreeCapacity(100);
409432
mCoder.addEmptyChip(ruData.cableData[chip.cableSW], chip.chipOnModuleHW, mInteractionRecord.bc);
@@ -423,7 +446,7 @@ class RawPixelReader : public PixelReader
423446
rdh.triggerOrbit = rdh.heartbeatOrbit = mInteractionRecord.orbit;
424447
rdh.triggerBC = rdh.heartbeatBC = mInteractionRecord.bc;
425448
rdh.triggerType = o2::trigger::PhT; // ??
426-
rdh.detectorField = MAP.getRUDetectorField();
449+
rdh.detectorField = mMAP.getRUDetectorField();
427450
rdh.blockLength = 0xffff; // ITS keeps this dummy
428451

429452
int maxGBTWordsPerPacket = (MaxGBTPacketBytes - rdh.headerSize) / o2::itsmft::GBTPaddedWordLength - 2;
@@ -442,7 +465,7 @@ class RawPixelReader : public PixelReader
442465
}
443466
}
444467
// move data in padded GBT words from cable buffers to link buffers
445-
rdh.feeId = MAP.RUSW2FEEId(ruData.ruInfo->idSW, il); // write on link 0 always
468+
rdh.feeId = mMAP.RUSW2FEEId(ruData.ruInfo->idSW, il); // write on link 0 always
446469
rdh.linkID = il;
447470
rdh.pageCnt = 0;
448471
rdh.stop = 0;
@@ -482,7 +505,7 @@ class RawPixelReader : public PixelReader
482505
int gbtWordStart = link->data.getSize(); // beginning of the current GBT word in the link
483506
link->data.addFast(cableData.getPtr(), nb); // fill payload of cable
484507
link->data.addFast(zero16, mGBTWordSize - nb); // fill the rest of the GBT word by 0
485-
link->data[gbtWordStart + 9] = MAP.getGBTHeaderRUType(ruData.ruInfo->ruType, ruData.cableHWID[icab]); // set cable flag
508+
link->data[gbtWordStart + 9] = mMAP.getGBTHeaderRUType(ruData.ruInfo->ruType, ruData.cableHWID[icab]); // set cable flag
486509
cableData.setPtr(cableData.getPtr() + nb);
487510
nGBTWordsNeeded--;
488511
if (mVerbose) {
@@ -549,7 +572,7 @@ class RawPixelReader : public PixelReader
549572
// return total number of pages flushed
550573

551574
int totPages = 0;
552-
for (int ru = 0; ru < MAP.getNRUs(); ru++) {
575+
for (int ru = 0; ru < mMAP.getNRUs(); ru++) {
553576
auto* ruData = getRUDecode(ru);
554577
if (!ruData) {
555578
continue;
@@ -620,12 +643,30 @@ class RawPixelReader : public PixelReader
620643
if (mVerbose) {
621644
printRDH(*rdh);
622645
}
623-
int ruIDSW = getMapping().FEEId2RUSW(rdh->feeId);
646+
int ruIDSW = mMAP.FEEId2RUSW(rdh->feeId);
647+
648+
#ifdef _RAW_READER_ERROR_CHECKS_
649+
if (ruIDSW >= mMAP.getNRUs()) {
650+
mDecodingStat.errorCounts[RawDecodingStat::ErrInvalidFEEId]++;
651+
LOG(ERROR) << mDecodingStat.ErrNames[RawDecodingStat::ErrInvalidFEEId] << " : " << rdh->feeId << ", skipping CRU page";
652+
mDecodingStat.nBytesProcessed += rdh->memorySize;
653+
mDecodingStat.nPagesProcessed++;
654+
655+
ptr += rdh->offsetToNext;
656+
buffer.setPtr(ptr);
657+
if (buffer.getUnusedSize() < MaxGBTPacketBytes) {
658+
nRead += loadInput(buffer); // update
659+
ptr = buffer.getPtr(); // pointer might have been changed
660+
}
661+
continue;
662+
}
663+
#endif
664+
624665
auto& ruDecode = getCreateRUDecode(ruIDSW);
625666

626667
bool newTrigger = true; // check if we see new trigger
627668
uint16_t lr, ruOnLr, linkIDinRU;
628-
getMapping().expandFEEId(rdh->feeId, lr, ruOnLr, linkIDinRU);
669+
mMAP.expandFEEId(rdh->feeId, lr, ruOnLr, linkIDinRU);
629670
auto link = ruDecode.links[linkIDinRU].get();
630671
if (link) { // was there any data seen on this link before?
631672
const auto rdhPrev = reinterpret_cast<o2::header::RAWDataHeader*>(link->data.getEnd() - link->lastPageSize); // last stored RDH
@@ -816,13 +857,22 @@ class RawPixelReader : public PixelReader
816857
}
817858
#endif
818859

819-
int ruIDSW = MAP.FEEId2RUSW(rdh->feeId);
820-
if (ruIDSW != ruDecData.ruInfo->idSW) {
860+
int ruIDSW = mMAP.FEEId2RUSW(rdh->feeId);
861+
#ifdef _RAW_READER_ERROR_CHECKS_
862+
if (ruIDSW >= mMAP.getNRUs()) {
863+
mDecodingStat.errorCounts[RawDecodingStat::ErrInvalidFEEId]++;
864+
LOG(ERROR) << mDecodingStat.ErrNames[RawDecodingStat::ErrInvalidFEEId] << " : " << rdh->feeId << ", skipping CRU page";
865+
raw += rdh->offsetToNext;
866+
return raw;
867+
}
868+
869+
if (ruIDSW != ruDecData.ruInfo->idSW) { // should not happen with cached data
821870
LOG(ERROR) << "RDG RU IDSW " << ruIDSW << " differs from expected " << ruDecData.ruInfo->idSW;
822871
}
872+
#endif
823873

824874
uint16_t lr, ruOnLr, linkIDinRU;
825-
getMapping().expandFEEId(rdh->feeId, lr, ruOnLr, linkIDinRU);
875+
mMAP.expandFEEId(rdh->feeId, lr, ruOnLr, linkIDinRU);
826876
auto& ruStat = ruDecData.links[linkIDinRU]->statistics;
827877
ruStat.nPackets++;
828878

@@ -861,6 +911,15 @@ class RawPixelReader : public PixelReader
861911

862912
ruStat.lanesActive = gbtH->getLanes(); // TODO do we need to update this for every page?
863913

914+
if (~(mMAP.getCablesOnRUType(ruDecData.ruInfo->ruType)) & ruStat.lanesActive) { // are there wrong lanes?
915+
std::bitset<32> expectL(mMAP.getCablesOnRUType(ruDecData.ruInfo->ruType)), gotL(ruStat.lanesActive);
916+
LOG(ERROR) << "FEE#0x" << std::hex << rdh->feeId << std::dec
917+
<< " Active lanes pattern " << gotL << " conflicts with expected " << expectL << " for given RU type, skip page";
918+
ruStat.errorCounts[GBTLinkDecodingStat::ErrInvalidActiveLanes]++;
919+
raw = ((uint8_t*)rdh) + rdh->offsetToNext; // jump to the next packet
920+
return raw;
921+
}
922+
864923
if (!rdh->pageCnt) { // reset flags
865924
ruStat.lanesStop = 0;
866925
ruStat.lanesWithData = 0;
@@ -882,7 +941,7 @@ class RawPixelReader : public PixelReader
882941
}
883942

884943
int cableHW = gbtD->getCableID();
885-
int cableSW = MAP.cableHW2SW(ruDecData.ruInfo->ruType, cableHW);
944+
int cableSW = mMAP.cableHW2SW(ruDecData.ruInfo->ruType, cableHW);
886945
ruDecData.cableData[cableSW].add(gbtD->getW8(), 9);
887946
ruDecData.cableHWID[cableSW] = cableHW;
888947

@@ -1023,12 +1082,21 @@ class RawPixelReader : public PixelReader
10231082
aborted = true;
10241083
return raw;
10251084
}
1085+
int ruIDSWD = mMAP.FEEId2RUSW(rdh->feeId);
1086+
if (ruIDSWD >= mMAP.getNRUs()) {
1087+
mDecodingStat.errorCounts[RawDecodingStat::ErrInvalidFEEId]++;
1088+
LOG(ERROR) << mDecodingStat.ErrNames[RawDecodingStat::ErrInvalidFEEId] << " : " << rdh->feeId << ", skipping CRU page";
1089+
mDecodingStat.nBytesProcessed += rdh->memorySize;
1090+
mDecodingStat.nPagesProcessed++;
1091+
raw += rdh->offsetToNext;
1092+
return raw;
1093+
}
10261094
#endif
10271095
uint16_t lr, ruOnLr, linkIDinRU;
1028-
getMapping().expandFEEId(rdh->feeId, lr, ruOnLr, linkIDinRU);
1029-
int ruIDSW = MAP.FEEId2RUSW(rdh->feeId);
1096+
mMAP.expandFEEId(rdh->feeId, lr, ruOnLr, linkIDinRU);
1097+
int ruIDSW = mMAP.FEEId2RUSW(rdh->feeId);
10301098
auto& ruDecode = getCreateRUDecode(ruIDSW);
1031-
auto ruInfo = MAP.getRUInfoSW(ruIDSW);
1099+
auto ruInfo = mMAP.getRUInfoSW(ruIDSW);
10321100

10331101
if (!ruDecode.links[linkIDinRU].get()) {
10341102
ruDecode.links[linkIDinRU] = std::make_unique<GBTLink>();
@@ -1116,7 +1184,7 @@ class RawPixelReader : public PixelReader
11161184
}
11171185

11181186
int cableHW = gbtD->getCableID();
1119-
int cableSW = MAP.cableHW2SW(ruInfo->ruType, cableHW);
1187+
int cableSW = mMAP.cableHW2SW(ruInfo->ruType, cableHW);
11201188

11211189
outBuffer.addFast(reinterpret_cast<const uint8_t*>(gbtD), mGBTWordSize); // save gbt word w/o 128b padding
11221190

@@ -1246,7 +1314,8 @@ class RawPixelReader : public PixelReader
12461314
// make sure the lane data starts with chip header or empty chip
12471315
uint8_t h;
12481316
if (cableData.current(h) && !mCoder.isChipHeaderOrEmpty(h)) {
1249-
LOG(ERROR) << "FEE#" << decData.ruInfo->idHW << " cable " << icab << " data does not start with ChipHeader or ChipEmpty";
1317+
LOG(ERROR) << "FEE#0x" << std::hex << decData.ruInfo->idHW << std::dec << " cable " << icab
1318+
<< " data does not start with ChipHeader or ChipEmpty";
12501319
ruStat.errorCounts[GBTLinkDecodingStat::ErrCableDataHeadWrong]++;
12511320
}
12521321
#endif
@@ -1263,7 +1332,7 @@ class RawPixelReader : public PixelReader
12631332
}
12641333
#endif
12651334
// convert HW chip id within the module to absolute chip id
1266-
chipData->setChipID(MAP.getGlobalChipID(chipData->getChipID(), decData.cableHWID[icab], *decData.ruInfo));
1335+
chipData->setChipID(mMAP.getGlobalChipID(chipData->getChipID(), decData.cableHWID[icab], *decData.ruInfo));
12671336
chipData->setInteractionRecord(mInteractionRecord);
12681337
chipData->setTrigger(mTrigger);
12691338
mDecodingStat.nNonEmptyChips++;
@@ -1359,7 +1428,7 @@ class RawPixelReader : public PixelReader
13591428
// get statics of FEE with given HW id
13601429
const GBTLinkDecodingStat* getGBTLinkDecodingStatHW(uint16_t idHW, int ruLink) const
13611430
{
1362-
int idsw = MAP.FEEId2RUSW(idHW);
1431+
int idsw = mMAP.FEEId2RUSW(idHW);
13631432
assert(idsw != 0xffff);
13641433
return getGBTLinkDecodingStatSW(idsw, ruLink);
13651434
}
@@ -1374,7 +1443,7 @@ class RawPixelReader : public PixelReader
13741443
void setVerbosity(int v) { mVerbose = v; }
13751444
int getVerbosity() const { return mVerbose; }
13761445

1377-
Mapping& getMapping() { return MAP; }
1446+
Mapping& getMapping() { return mMAP; }
13781447

13791448
// get currently processed RU container
13801449
const RUDecodeData* getCurrRUDecodeData() const { return mCurRUDecodeID < 0 ? nullptr : &mRUDecodeVec[mCurRUDecodeID]; }
@@ -1401,10 +1470,10 @@ class RawPixelReader : public PixelReader
14011470
// get RU decode container for RU with given SW ID, if does not exist, create it
14021471
RUDecodeData& getCreateRUDecode(int ruSW)
14031472
{
1404-
assert(ruSW < MAP.getNRUs());
1473+
assert(ruSW < mMAP.getNRUs());
14051474
if (mRUEntry[ruSW] < 0) {
14061475
mRUEntry[ruSW] = mNRUs++;
1407-
mRUDecodeVec[mRUEntry[ruSW]].ruInfo = MAP.getRUInfoSW(ruSW); // info on the stave/RU
1476+
mRUDecodeVec[mRUEntry[ruSW]].ruInfo = mMAP.getRUInfoSW(ruSW); // info on the stave/RU
14081477
LOG(INFO) << "Defining container for RU " << ruSW << " at slot " << mRUEntry[ruSW];
14091478
}
14101479
return mRUDecodeVec[mRUEntry[ruSW]];
@@ -1413,7 +1482,7 @@ class RawPixelReader : public PixelReader
14131482
private:
14141483
std::ifstream mIOFile;
14151484
Coder mCoder;
1416-
Mapping MAP;
1485+
Mapping mMAP;
14171486
int mVerbose = 0; //! verbosity level
14181487
int mCurRUDecodeID = -1; //! index of currently processed RUDecode container
14191488

0 commit comments

Comments
 (0)