Skip to content

Commit e39fb00

Browse files
mzeren-vmwcdunn2001
authored andcommitted
Normalize comment EOLs while reading instead of while writing
Tests are currently failing when git cloning on Windows with autocrlf = true. In that setup multiline comments contain \r\n EOLs. The test code assumes that comments contain \n EOLs and opens the .actual files (etc.) with "wt" which converts \n to \r\n. Thus we end up with \r\r\n EOLs in the output, which triggers a test failure. Instead we should cannonicalize comments while reading so that they contain only \n EOLs. This approach simplifies other parts of the reader and writer logic, and requires no changes to the test. It is a breaking change, but probably the Right Thing going forward. This change also fixes dereferencing past the end of the comment string in StyledWriter::writeCommentBeforeValue. Tests should be added with appropriate .gitattributes for the input files to ensure that we run tests for DOS, Mac, and Unix EOL files on all platforms. For now this change is enough to unblock Windows builds. issue open-source-parsers#116
1 parent ec727e2 commit e39fb00

2 files changed

Lines changed: 44 additions & 59 deletions

File tree

src/lib_json/json_reader.cpp

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,9 @@ bool Reader::readValue() {
135135
bool successful = true;
136136

137137
if (collectComments_ && !commentsBefore_.empty()) {
138-
// Remove newline characters at the end of the comments
139-
size_t lastNonNewline = commentsBefore_.find_last_not_of("\r\n");
140-
if (lastNonNewline != std::string::npos) {
141-
commentsBefore_.erase(lastNonNewline + 1);
142-
} else {
143-
commentsBefore_.clear();
144-
}
145-
138+
// Remove newline at the end of the comment
139+
if (commentsBefore_[commentsBefore_.size() - 1] == '\n')
140+
commentsBefore_.resize(commentsBefore_.size() - 1);
146141
currentValue().setComment(commentsBefore_, commentBefore);
147142
commentsBefore_ = "";
148143
}
@@ -337,14 +332,34 @@ bool Reader::readComment() {
337332
return true;
338333
}
339334

335+
static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
336+
std::string normalized;
337+
normalized.reserve(end - begin);
338+
Reader::Location current = begin;
339+
while (current != end) {
340+
char c = *current++;
341+
if (c == '\r') {
342+
if (current != end && *current == '\n')
343+
// convert dos EOL
344+
++current;
345+
// convert Mac EOL
346+
normalized += '\n';
347+
} else {
348+
normalized += c;
349+
}
350+
}
351+
return normalized;
352+
}
353+
340354
void
341355
Reader::addComment(Location begin, Location end, CommentPlacement placement) {
342356
assert(collectComments_);
357+
const std::string& normalized = normalizeEOL(begin, end);
343358
if (placement == commentAfterOnSameLine) {
344359
assert(lastValue_ != 0);
345-
lastValue_->setComment(std::string(begin, end), placement);
360+
lastValue_->setComment(normalized, placement);
346361
} else {
347-
commentsBefore_ += std::string(begin, end);
362+
commentsBefore_ += normalized;
348363
}
349364
}
350365

@@ -360,8 +375,15 @@ bool Reader::readCStyleComment() {
360375
bool Reader::readCppStyleComment() {
361376
while (current_ != end_) {
362377
Char c = getNextChar();
363-
if (c == '\r' || c == '\n')
378+
if (c == '\n')
364379
break;
380+
if (c == '\r') {
381+
// Consume DOS EOL. It will be normalized in addComment.
382+
if (current_ != end_ && *current_ == '\n')
383+
getNextChar();
384+
// Break on Moc OS 9 EOL.
385+
break;
386+
}
365387
}
366388
return true;
367389
}

src/lib_json/json_writer.cpp

Lines changed: 11 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -421,26 +421,27 @@ void StyledWriter::writeCommentBeforeValue(const Value& root) {
421421

422422
document_ += "\n";
423423
writeIndent();
424-
std::string normalizedComment = normalizeEOL(root.getComment(commentBefore));
425-
std::string::const_iterator iter = normalizedComment.begin();
426-
while (iter != normalizedComment.end()) {
424+
const std::string& comment = root.getComment(commentBefore);
425+
std::string::const_iterator iter = comment.begin();
426+
while (iter != comment.end()) {
427427
document_ += *iter;
428-
if (*iter == '\n' && *(iter + 1) == '/')
428+
if (*iter == '\n' &&
429+
(iter != comment.end() && *(iter + 1) == '/'))
429430
writeIndent();
430431
++iter;
431432
}
432433

433-
// Comments are stripped of newlines, so add one here
434+
// Comments are stripped of trailing newlines, so add one here
434435
document_ += "\n";
435436
}
436437

437438
void StyledWriter::writeCommentAfterValueOnSameLine(const Value& root) {
438439
if (root.hasComment(commentAfterOnSameLine))
439-
document_ += " " + normalizeEOL(root.getComment(commentAfterOnSameLine));
440+
document_ += " " + root.getComment(commentAfterOnSameLine);
440441

441442
if (root.hasComment(commentAfter)) {
442443
document_ += "\n";
443-
document_ += normalizeEOL(root.getComment(commentAfter));
444+
document_ += root.getComment(commentAfter);
444445
document_ += "\n";
445446
}
446447
}
@@ -451,25 +452,6 @@ bool StyledWriter::hasCommentForValue(const Value& value) {
451452
value.hasComment(commentAfter);
452453
}
453454

454-
std::string StyledWriter::normalizeEOL(const std::string& text) {
455-
std::string normalized;
456-
normalized.reserve(text.length());
457-
const char* begin = text.c_str();
458-
const char* end = begin + text.length();
459-
const char* current = begin;
460-
while (current != end) {
461-
char c = *current++;
462-
if (c == '\r') // mac or dos EOL
463-
{
464-
if (*current == '\n') // convert dos EOL
465-
++current;
466-
normalized += '\n';
467-
} else // handle unix EOL & other char
468-
normalized += c;
469-
}
470-
return normalized;
471-
}
472-
473455
// Class StyledStreamWriter
474456
// //////////////////////////////////////////////////////////////////
475457

@@ -646,17 +628,17 @@ void StyledStreamWriter::unindent() {
646628
void StyledStreamWriter::writeCommentBeforeValue(const Value& root) {
647629
if (!root.hasComment(commentBefore))
648630
return;
649-
*document_ << normalizeEOL(root.getComment(commentBefore));
631+
*document_ << root.getComment(commentBefore);
650632
*document_ << "\n";
651633
}
652634

653635
void StyledStreamWriter::writeCommentAfterValueOnSameLine(const Value& root) {
654636
if (root.hasComment(commentAfterOnSameLine))
655-
*document_ << " " + normalizeEOL(root.getComment(commentAfterOnSameLine));
637+
*document_ << " " + root.getComment(commentAfterOnSameLine);
656638

657639
if (root.hasComment(commentAfter)) {
658640
*document_ << "\n";
659-
*document_ << normalizeEOL(root.getComment(commentAfter));
641+
*document_ << root.getComment(commentAfter);
660642
*document_ << "\n";
661643
}
662644
}
@@ -667,25 +649,6 @@ bool StyledStreamWriter::hasCommentForValue(const Value& value) {
667649
value.hasComment(commentAfter);
668650
}
669651

670-
std::string StyledStreamWriter::normalizeEOL(const std::string& text) {
671-
std::string normalized;
672-
normalized.reserve(text.length());
673-
const char* begin = text.c_str();
674-
const char* end = begin + text.length();
675-
const char* current = begin;
676-
while (current != end) {
677-
char c = *current++;
678-
if (c == '\r') // mac or dos EOL
679-
{
680-
if (*current == '\n') // convert dos EOL
681-
++current;
682-
normalized += '\n';
683-
} else // handle unix EOL & other char
684-
normalized += c;
685-
}
686-
return normalized;
687-
}
688-
689652
std::ostream& operator<<(std::ostream& sout, const Value& root) {
690653
Json::StyledStreamWriter writer;
691654
writer.write(sout, root);

0 commit comments

Comments
 (0)