Skip to content

Commit b6c90b3

Browse files
sevdokimdavidrohr
authored andcommitted
CPV: mute raw decoder error reporting for 10 minutes if it reports more than 10 errors per minute
1 parent e537af1 commit b6c90b3

4 files changed

Lines changed: 110 additions & 39 deletions

File tree

Detectors/CPV/reconstruction/include/CPVReconstruction/RawDecoder.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@ namespace o2
2424
namespace cpv
2525
{
2626

27-
class RawDecoderError
28-
{
29-
public:
27+
struct RawDecoderError {
3028
RawDecoderError() = default; //Constructors for vector::emplace_back methods
3129
RawDecoderError(short c, short d, short g, short p, RawErrorType_t e) : ccId(c), dil(d), gas(g), pad(p), errortype(e) {}
3230
RawDecoderError(const RawDecoderError& e) = default;
@@ -37,7 +35,6 @@ class RawDecoderError
3735
short gas;
3836
short pad;
3937
RawErrorType_t errortype;
40-
ClassDefNV(RawDecoderError, 1);
4138
};
4239

4340
union AddressCharge {
@@ -99,6 +96,9 @@ class RawDecoder
9996
/// \return Reference to the list of decoding errors
10097
const std::vector<o2::cpv::RawDecoderError>& getErrors() const { return mErrors; }
10198

99+
/// \brief mute error reporting
100+
void muteErrors() { mIsMuteErrors = true; }
101+
102102
protected:
103103
/// \brief Read channels for the current event in the raw buffer
104104
RawErrorType_t readChannels();
@@ -111,9 +111,10 @@ class RawDecoder
111111
std::vector<uint32_t> mDigits; ///< vector of channels and BCs in the raw stream
112112
std::vector<o2::cpv::BCRecord> mBCRecords; ///< vector of bc references to digits
113113
std::vector<RawDecoderError> mErrors; ///< vector of decoding errors
114-
bool mChannelsInitialized = false; ///< check whether the channels are initialized
114+
bool mChannelsInitialized; ///< check whether the channels are initialized
115+
bool mIsMuteErrors; ///< mute errors
115116

116-
ClassDefNV(RawDecoder, 2);
117+
ClassDefNV(RawDecoder, 3);
117118
};
118119

119120
} // namespace cpv

Detectors/CPV/reconstruction/src/RawDecoder.cxx

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@
1919
using namespace o2::cpv;
2020

2121
RawDecoder::RawDecoder(RawReaderMemory& reader) : mRawReader(reader),
22-
mChannelsInitialized(false)
22+
mChannelsInitialized(false),
23+
mIsMuteErrors(false)
2324
{
2425
}
2526

2627
RawErrorType_t RawDecoder::decode()
2728
{
28-
2929
auto& rdh = mRawReader.getRawHeader();
3030
short linkID = o2::raw::RDHUtils::getLinkID(rdh);
3131
mDigits.clear();
@@ -42,6 +42,12 @@ RawErrorType_t RawDecoder::decode()
4242
RawErrorType_t RawDecoder::readChannels()
4343
{
4444
mChannelsInitialized = false;
45+
// // test error
46+
// if (!mIsMuteErrors) {
47+
// LOG(error) << "RawDecoder::readChannels() : "
48+
// << "test error";
49+
// }
50+
// mErrors.emplace_back(-1, 0, 0, 0, kOK); //5 is non-existing link with general errors
4551

4652
auto& payloadWords = mRawReader.getPayload();
4753
uint32_t wordCountFromLastHeader = 1; //header word is included
@@ -59,20 +65,24 @@ RawErrorType_t RawDecoder::readChannels()
5965
<< "I read cpv header for orbit = " << header.orbit()
6066
<< " and BC = " << header.bc();
6167
if (!isHeaderExpected) { //actually, header was not expected
62-
LOG(error) << "RawDecoder::readChannels() : "
63-
<< "header was not expected";
68+
if (!mIsMuteErrors) {
69+
LOG(error) << "RawDecoder::readChannels() : "
70+
<< "header was not expected";
71+
}
6472
removeLastNDigits(nDigitsAddedFromLastHeader); //remove previously added digits as they are bad
65-
mErrors.emplace_back(5, 0, 0, 0, kNO_CPVTRAILER);
73+
mErrors.emplace_back(-1, 0, 0, 0, kNO_CPVTRAILER);
6674
}
6775
skipUntilNextHeader = false;
6876
currentBC = header.bc();
6977
wordCountFromLastHeader = 0;
7078
nDigitsAddedFromLastHeader = 0;
7179
if (currentOrbit != header.orbit()) { //bad cpvheader
72-
LOG(error) << "RawDecoder::readChannels() : "
73-
<< "currentOrbit(=" << currentOrbit
74-
<< ") != header.orbit()(=" << header.orbit() << ")";
75-
mErrors.emplace_back(5, 0, 0, 0, kCPVHEADER_INVALID); //5 is non-existing link with general errors
80+
if (!mIsMuteErrors) {
81+
LOG(error) << "RawDecoder::readChannels() : "
82+
<< "currentOrbit(=" << currentOrbit
83+
<< ") != header.orbit()(=" << header.orbit() << ")";
84+
}
85+
mErrors.emplace_back(-1, 0, 0, 0, kCPVHEADER_INVALID); //5 is non-existing link with general errors
7686
skipUntilNextHeader = true;
7787
}
7888
} else {
@@ -89,8 +99,10 @@ RawErrorType_t RawDecoder::readChannels()
8999
if (addDigit(pw.mDataWord, word.ccId(), currentBC)) {
90100
nDigitsAddedFromLastHeader++;
91101
} else {
92-
LOG(debug) << "RawDecoder::readChannels() : "
93-
<< "read pad word with non-valid pad address";
102+
if (!mIsMuteErrors) {
103+
LOG(debug) << "RawDecoder::readChannels() : "
104+
<< "read pad word with non-valid pad address";
105+
}
94106
unsigned int dil = pw.dil, gas = pw.gas, address = pw.address;
95107
mErrors.emplace_back(word.ccId(), dil, gas, address, kPadAddress);
96108
}
@@ -103,28 +115,34 @@ RawErrorType_t RawDecoder::readChannels()
103115
if (diffInCount > 1 ||
104116
diffInCount < -1) {
105117
//some words lost?
106-
LOG(error) << "RawDecoder::readChannels() : "
107-
<< "Read " << wordCountFromLastHeader << " words, expected " << trailer.wordCounter();
108-
mErrors.emplace_back(5, 0, 0, 0, kCPVTRAILER_INVALID);
118+
if (!mIsMuteErrors) {
119+
LOG(error) << "RawDecoder::readChannels() : "
120+
<< "Read " << wordCountFromLastHeader << " words, expected " << trailer.wordCounter();
121+
}
122+
mErrors.emplace_back(-1, 0, 0, 0, kCPVTRAILER_INVALID);
109123
//throw all previous data and go to next header
110124
removeLastNDigits(nDigitsAddedFromLastHeader);
111125
skipUntilNextHeader = true;
112126
}
113127
if (trailer.bc() != currentBC) {
114128
//trailer does not fit header
115-
LOG(error) << "RawDecoder::readChannels() : "
116-
<< "CPVHeader BC is " << currentBC << " but CPVTrailer BC is " << trailer.bc();
117-
mErrors.emplace_back(5, 0, 0, 0, kCPVTRAILER_INVALID);
129+
if (!mIsMuteErrors) {
130+
LOG(error) << "RawDecoder::readChannels() : "
131+
<< "CPVHeader BC(" << currentBC << ") != CPVTrailer BC(" << trailer.bc() << ")";
132+
}
133+
mErrors.emplace_back(-1, 0, 0, 0, kCPVTRAILER_INVALID);
118134
removeLastNDigits(nDigitsAddedFromLastHeader);
119135
skipUntilNextHeader = true;
120136
}
121137
isHeaderExpected = true;
122138
} else {
123139
wordCountFromLastHeader++;
124140
//error
125-
LOG(error) << "RawDecoder::readChannels() : "
126-
<< "Read unknown word";
127-
mErrors.emplace_back(5, 0, 0, 0, kUNKNOWN_WORD); //add error for non-existing row
141+
if (!mIsMuteErrors) {
142+
LOG(error) << "RawDecoder::readChannels() : "
143+
<< "Read unknown word";
144+
}
145+
mErrors.emplace_back(-1, 0, 0, 0, kUNKNOWN_WORD); //add error for non-existing row
128146
//what to do?
129147
}
130148
}

Detectors/CPV/workflow/include/CPVWorkflow/RawToDigitConverterSpec.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
// or submit itself to any jurisdiction.
1111

1212
#include <vector>
13-
13+
#include <chrono>
1414
#include "Framework/DataProcessorSpec.h"
1515
#include "Framework/Task.h"
1616
#include "Framework/ConcreteDataMatcher.h"
@@ -68,12 +68,18 @@ class RawToDigitConverterSpec : public framework::Task
6868
char CheckHWAddress(short ddl, short hwAddress, short& fee);
6969

7070
private:
71-
bool mIsUsingGainCalibration; ///< Use gain calibration from CCDB
72-
bool mIsUsingBadMap; ///< Use BadChannelMap to mask bad channels
73-
bool mIsPedestalData; ///< Do not subtract pedestals if true
74-
std::vector<Digit> mOutputDigits; ///< Container with output cells
75-
std::vector<TriggerRecord> mOutputTriggerRecords; ///< Container with output cells
76-
std::vector<RawDecoderError> mOutputHWErrors; ///< Errors occured in reading data
71+
bool mIsUsingGainCalibration; ///< Use gain calibration from CCDB
72+
bool mIsUsingBadMap; ///< Use BadChannelMap to mask bad channels
73+
bool mIsPedestalData; ///< Do not subtract pedestals if true
74+
std::vector<Digit> mOutputDigits; ///< Container with output cells
75+
std::vector<TriggerRecord> mOutputTriggerRecords; ///< Container with output cells
76+
std::vector<RawDecoderError> mOutputHWErrors; ///< Errors occured in reading data
77+
bool mIsMuteDecoderErrors = false; ///< mute errors for 10 minutes
78+
int mDecoderErrorsCounterWhenMuted = 0; ///< errors counter while errors are muted
79+
int mDecoderErrorsPerMinute = 0; ///< errors per minute counter
80+
int mMinutesPassed = 0; ///< runtime duration in minutes
81+
std::chrono::time_point<std::chrono::system_clock> mStartTime; ///< Time of start of decoding
82+
std::chrono::time_point<std::chrono::system_clock> mTimeWhenMuted; ///< Time when muted errors
7783
};
7884

7985
/// \brief Creating DataProcessorSpec for the CPV Digit Converter Spec

Detectors/CPV/workflow/src/RawToDigitConverterSpec.cxx

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ using Lifetime = o2::framework::Lifetime;
3535

3636
void RawToDigitConverterSpec::init(framework::InitContext& ctx)
3737
{
38+
mStartTime = std::chrono::system_clock::now();
39+
mDecoderErrorsPerMinute = 0;
40+
mIsMuteDecoderErrors = false;
41+
3842
LOG(debug) << "Initializing RawToDigitConverterSpec...";
3943
// Pedestal flag true/false
4044
LOG(info) << "Pedestal run: " << (mIsPedestalData ? "YES" : "NO");
@@ -56,6 +60,23 @@ void RawToDigitConverterSpec::init(framework::InitContext& ctx)
5660

5761
void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
5862
{
63+
// check timers if we need mute/unmute error reporting
64+
auto now = std::chrono::system_clock::now();
65+
if (mIsMuteDecoderErrors) { // check if 10-minutes muting period passed
66+
if (((now - mTimeWhenMuted) / std::chrono::minutes(1)) >= 10) {
67+
mIsMuteDecoderErrors = false; //unmute
68+
if (mDecoderErrorsCounterWhenMuted) {
69+
LOG(error) << "RawToDigitConverterSpec::run() : " << mDecoderErrorsCounterWhenMuted << " errors happened while it was muted ((";
70+
}
71+
mDecoderErrorsCounterWhenMuted = 0;
72+
}
73+
}
74+
if (((now - mStartTime) / std::chrono::minutes(1)) > mMinutesPassed) {
75+
mMinutesPassed = (now - mStartTime) / std::chrono::minutes(1);
76+
LOG(debug) << "minutes passed: " << mMinutesPassed;
77+
mDecoderErrorsPerMinute = 0;
78+
}
79+
5980
// Cache digits from bunch crossings as the component reads timeframes from many links consecutively
6081
std::map<o2::InteractionRecord, std::shared_ptr<std::vector<o2::cpv::Digit>>> digitBuffer; // Internal digit buffer
6182
int firstEntry = 0;
@@ -120,11 +141,13 @@ void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
120141
try {
121142
rawreader.next();
122143
} catch (RawErrorType_t e) {
123-
LOG(error) << "Raw decoding error " << (int)e;
144+
if (!mIsMuteDecoderErrors) {
145+
LOG(error) << "Raw decoding error " << (int)e;
146+
}
124147
//add error list
125148
//RawErrorType_t is defined in O2/Detectors/CPV/reconstruction/include/CPVReconstruction/RawReaderMemory.h
126149
//RawDecoderError(short c, short d, short g, short p, RawErrorType_t e)
127-
mOutputHWErrors.emplace_back(25, 0, 0, 0, e); //Put general errors to non-existing ccId 25
150+
mOutputHWErrors.emplace_back(-1, 0, 0, 0, e); //Put general errors to non-existing ccId -1
128151
//if problem in header, abandon this page
129152
if (e == RawErrorType_t::kRDH_DECODING) {
130153
LOG(error) << "RDH decoding error. Skipping this TF";
@@ -139,17 +162,40 @@ void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
139162
auto mod = o2::raw::RDHUtils::getLinkID(rdh) + 2; //link=0,1,2 -> mod=2,3,4
140163
//for now all modules are written to one LinkID
141164
if (mod > o2::cpv::Geometry::kNMod || mod < 2) { //only 3 correct modules:2,3,4
142-
LOG(error) << "module=" << mod << "do not exist";
143-
mOutputHWErrors.emplace_back(25, mod, 0, 0, kRDH_INVALID); //Add non-existing modules to non-existing ccId 25 and dilogic = mod
144-
continue; //skip STU mod
165+
if (!mIsMuteDecoderErrors) {
166+
LOG(error) << "RDH linkId corresponds to module " << mod << " which does not exist";
167+
}
168+
mOutputHWErrors.emplace_back(-1, mod, 0, 0, kRDH_INVALID); //Add non-existing modules to non-existing ccId -1 and dilogic = mod
169+
continue;
145170
}
146171
o2::cpv::RawDecoder decoder(rawreader);
172+
if (mIsMuteDecoderErrors) {
173+
decoder.muteErrors();
174+
}
147175
RawErrorType_t err = decoder.decode();
176+
int decoderErrors = 0;
177+
for (auto errs : decoder.getErrors()) {
178+
if (errs.ccId == -1) { // error related to wrong data format
179+
decoderErrors++;
180+
}
181+
}
182+
mDecoderErrorsPerMinute += decoderErrors;
183+
// LOG(debug) << "RawDecoder found " << decoderErrors << " raw format errors";
184+
// LOG(debug) << "Now I have " << mDecoderErrorsPerMinute << " errors for current minute";
185+
if (mIsMuteDecoderErrors) {
186+
mDecoderErrorsCounterWhenMuted += decoder.getErrors().size();
187+
} else {
188+
if (mDecoderErrorsPerMinute > 10) { // mute error reporting for 10 minutes
189+
LOG(warning) << "> 10 raw decoder error messages per minute, muting it for 10 minutes";
190+
mIsMuteDecoderErrors = true;
191+
mTimeWhenMuted = std::chrono::system_clock::now();
192+
}
193+
}
148194

149195
if (!(err == kOK || err == kOK_NO_PAYLOAD)) {
150196
//TODO handle severe errors
151197
//TODO: probably careful conversion of decoder errors to Fitter errors?
152-
mOutputHWErrors.emplace_back(25, mod, 0, 0, err); //assign general RDH errors to non-existing ccId 25 and dilogic = mod
198+
mOutputHWErrors.emplace_back(-1, mod, 0, 0, err); //assign general RDH errors to non-existing ccId -1 and dilogic = mod
153199
}
154200

155201
std::shared_ptr<std::vector<o2::cpv::Digit>> currentDigitContainer;

0 commit comments

Comments
 (0)