| 1 | // Copyright (C) 2021 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
| 3 | |
| 4 | #include "quoter.h" |
| 5 | |
| 6 | #include <QtCore/qdebug.h> |
| 7 | #include <QtCore/qfileinfo.h> |
| 8 | #include <QtCore/qregularexpression.h> |
| 9 | |
| 10 | QT_BEGIN_NAMESPACE |
| 11 | |
| 12 | using namespace Qt::StringLiterals; |
| 13 | |
| 14 | QHash<QString, QString> Quoter::; |
| 15 | |
| 16 | static void replaceMultipleNewlines(QString &s) |
| 17 | { |
| 18 | const qsizetype n = s.size(); |
| 19 | bool slurping = false; |
| 20 | int j = -1; |
| 21 | const QChar newLine = QLatin1Char('\n'); |
| 22 | QChar *d = s.data(); |
| 23 | for (int i = 0; i != n; ++i) { |
| 24 | const QChar c = d[i]; |
| 25 | bool hit = (c == newLine); |
| 26 | if (slurping && hit) |
| 27 | continue; |
| 28 | d[++j] = c; |
| 29 | slurping = hit; |
| 30 | } |
| 31 | s.resize(size: ++j); |
| 32 | } |
| 33 | |
| 34 | // This is equivalent to line.split( QRegularExpression("\n(?!\n|$)") ) but much faster |
| 35 | QStringList Quoter::splitLines(const QString &line) |
| 36 | { |
| 37 | QStringList result; |
| 38 | qsizetype i = line.size(); |
| 39 | while (true) { |
| 40 | qsizetype j = i - 1; |
| 41 | while (j >= 0 && line.at(i: j) == QLatin1Char('\n')) |
| 42 | --j; |
| 43 | while (j >= 0 && line.at(i: j) != QLatin1Char('\n')) |
| 44 | --j; |
| 45 | result.prepend(t: line.mid(position: j + 1, n: i - j - 1)); |
| 46 | if (j < 0) |
| 47 | break; |
| 48 | i = j; |
| 49 | } |
| 50 | return result; |
| 51 | } |
| 52 | |
| 53 | /* |
| 54 | Transforms 'int x = 3 + 4' into 'int x=3+4'. A white space is kept |
| 55 | between 'int' and 'x' because it is meaningful in C++. |
| 56 | */ |
| 57 | static void trimWhiteSpace(QString &str) |
| 58 | { |
| 59 | enum { Normal, MetAlnum, MetSpace } state = Normal; |
| 60 | const qsizetype n = str.size(); |
| 61 | |
| 62 | int j = -1; |
| 63 | QChar *d = str.data(); |
| 64 | for (int i = 0; i != n; ++i) { |
| 65 | const QChar c = d[i]; |
| 66 | if (c.isLetterOrNumber()) { |
| 67 | if (state == Normal) { |
| 68 | state = MetAlnum; |
| 69 | } else { |
| 70 | if (state == MetSpace) |
| 71 | str[++j] = c; |
| 72 | state = Normal; |
| 73 | } |
| 74 | str[++j] = c; |
| 75 | } else if (c.isSpace()) { |
| 76 | if (state == MetAlnum) |
| 77 | state = MetSpace; |
| 78 | } else { |
| 79 | state = Normal; |
| 80 | str[++j] = c; |
| 81 | } |
| 82 | } |
| 83 | str.resize(size: ++j); |
| 84 | } |
| 85 | |
| 86 | Quoter::Quoter() : m_silent(false) |
| 87 | { |
| 88 | /* We're going to hard code these delimiters: |
| 89 | * C++, Qt, Qt Script, Java: |
| 90 | //! [<id>] |
| 91 | * .pro, .py, CMake files: |
| 92 | #! [<id>] |
| 93 | * .html, .qrc, .ui, .xq, .xml files: |
| 94 | <!-- [<id>] --> |
| 95 | */ |
| 96 | if (s_commentHash.empty()) { |
| 97 | s_commentHash["pro" ] = "#!" ; |
| 98 | s_commentHash["py" ] = "#!" ; |
| 99 | s_commentHash["cmake" ] = "#!" ; |
| 100 | s_commentHash["html" ] = "<!--" ; |
| 101 | s_commentHash["qrc" ] = "<!--" ; |
| 102 | s_commentHash["ui" ] = "<!--" ; |
| 103 | s_commentHash["xml" ] = "<!--" ; |
| 104 | s_commentHash["xq" ] = "<!--" ; |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | void Quoter::reset() |
| 109 | { |
| 110 | m_silent = false; |
| 111 | m_plainLines.clear(); |
| 112 | m_markedLines.clear(); |
| 113 | m_codeLocation = Location(); |
| 114 | } |
| 115 | |
| 116 | void Quoter::quoteFromFile(const QString &userFriendlyFilePath, const QString &plainCode, |
| 117 | const QString &markedCode) |
| 118 | { |
| 119 | m_silent = false; |
| 120 | |
| 121 | /* |
| 122 | Split the source code into logical lines. Empty lines are |
| 123 | treated specially. Before: |
| 124 | |
| 125 | p->alpha(); |
| 126 | p->beta(); |
| 127 | |
| 128 | p->gamma(); |
| 129 | |
| 130 | |
| 131 | p->delta(); |
| 132 | |
| 133 | After: |
| 134 | |
| 135 | p->alpha(); |
| 136 | p->beta();\n |
| 137 | p->gamma();\n\n |
| 138 | p->delta(); |
| 139 | |
| 140 | Newlines are preserved because they affect codeLocation. |
| 141 | */ |
| 142 | m_codeLocation = Location(userFriendlyFilePath); |
| 143 | |
| 144 | m_plainLines = splitLines(line: plainCode); |
| 145 | m_markedLines = splitLines(line: markedCode); |
| 146 | if (m_markedLines.size() != m_plainLines.size()) { |
| 147 | m_codeLocation.warning( |
| 148 | QStringLiteral("Something is wrong with qdoc's handling of marked code" )); |
| 149 | m_markedLines = m_plainLines; |
| 150 | } |
| 151 | |
| 152 | /* |
| 153 | Squeeze blanks (cat -s). |
| 154 | */ |
| 155 | for (auto &line : m_markedLines) |
| 156 | replaceMultipleNewlines(s&: line); |
| 157 | m_codeLocation.start(); |
| 158 | } |
| 159 | |
| 160 | QString Quoter::quoteLine(const Location &docLocation, const QString &command, |
| 161 | const QString &pattern) |
| 162 | { |
| 163 | if (m_plainLines.isEmpty()) { |
| 164 | failedAtEnd(docLocation, command); |
| 165 | return QString(); |
| 166 | } |
| 167 | |
| 168 | if (pattern.isEmpty()) { |
| 169 | docLocation.warning(QStringLiteral("Missing pattern after '\\%1'" ).arg(a: command)); |
| 170 | return QString(); |
| 171 | } |
| 172 | |
| 173 | if (match(docLocation, pattern, line: m_plainLines.first())) |
| 174 | return getLine(); |
| 175 | |
| 176 | if (!m_silent) { |
| 177 | docLocation.warning(QStringLiteral("Command '\\%1' failed" ).arg(a: command)); |
| 178 | m_codeLocation.warning(QStringLiteral("Pattern '%1' didn't match here" ).arg(a: pattern)); |
| 179 | m_silent = true; |
| 180 | } |
| 181 | return QString(); |
| 182 | } |
| 183 | |
| 184 | /*! |
| 185 | Calculate the number of leading space characters in \a line. |
| 186 | This function only counts space characters, not tabs or other whitespace. |
| 187 | */ |
| 188 | int Quoter::calculateIndentation(const QString &line) const |
| 189 | { |
| 190 | int indent = 0; |
| 191 | while (indent < line.size() && line[indent] == ' '_L1) |
| 192 | ++indent; |
| 193 | return indent; |
| 194 | } |
| 195 | |
| 196 | Quoter::SnippetIndentation Quoter::analyzeContentIndentation(const Location &docLocation, const QString &delimiter) |
| 197 | { |
| 198 | SnippetIndentation result; |
| 199 | const QString = commentForCode(); |
| 200 | |
| 201 | for (const QString &line : m_plainLines) { |
| 202 | if (match(docLocation, pattern: delimiter, line)) |
| 203 | break; // Found end delimiter |
| 204 | |
| 205 | const QString trimmed = line.trimmed(); |
| 206 | if (trimmed.isEmpty() || |
| 207 | trimmed.startsWith(s: "QT_BEGIN_NAMESPACE"_L1 ) || |
| 208 | trimmed.startsWith(s: "QT_END_NAMESPACE"_L1 ) || |
| 209 | trimmed.startsWith(s: comment)) { |
| 210 | continue; |
| 211 | } |
| 212 | |
| 213 | result.hasNonEmptyContent = true; |
| 214 | result.minContentIndent = qMin(a: result.minContentIndent, b: calculateIndentation(line)); |
| 215 | } |
| 216 | return result; |
| 217 | } |
| 218 | |
| 219 | |
| 220 | QString Quoter::quoteSnippet(const Location &docLocation, const QString &identifier) |
| 221 | { |
| 222 | QString = commentForCode(); |
| 223 | QString delimiter = comment + QString(" [%1]" ).arg(a: identifier); |
| 224 | QString snippetContent; |
| 225 | int markerIndent = 0; |
| 226 | |
| 227 | // Find start delimiter and get its indentation |
| 228 | while (!m_plainLines.isEmpty()) { |
| 229 | if (match(docLocation, pattern: delimiter, line: m_plainLines.first())) { |
| 230 | markerIndent = calculateIndentation(line: m_plainLines.first()); |
| 231 | getLine(); |
| 232 | break; |
| 233 | } |
| 234 | getLine(); |
| 235 | } |
| 236 | |
| 237 | const auto indentationInfo = analyzeContentIndentation(docLocation, delimiter); |
| 238 | const int unindent = |
| 239 | indentationInfo.hasNonEmptyContent |
| 240 | ? qMin(a: markerIndent, b: indentationInfo.minContentIndent) |
| 241 | : markerIndent; |
| 242 | |
| 243 | while (!m_plainLines.isEmpty()) { |
| 244 | QString line = m_plainLines.first(); |
| 245 | if (match(docLocation, pattern: delimiter, line)) { |
| 246 | QString lastLine = getLine(unindent); |
| 247 | qsizetype dIndex = lastLine.indexOf(s: delimiter); |
| 248 | if (dIndex > 0) { |
| 249 | // The delimiter might be preceded on the line by other |
| 250 | // delimeters, so look for the first comment on the line. |
| 251 | QString leading = lastLine.left(n: dIndex); |
| 252 | dIndex = leading.indexOf(s: comment); |
| 253 | if (dIndex != -1) |
| 254 | leading = leading.left(n: dIndex); |
| 255 | if (leading.endsWith(s: QLatin1String("<@comment>" ))) |
| 256 | leading.chop(n: 10); |
| 257 | if (!leading.trimmed().isEmpty()) |
| 258 | snippetContent += leading; |
| 259 | } |
| 260 | return snippetContent; |
| 261 | } |
| 262 | snippetContent += removeSpecialLines(line, comment, unindent); |
| 263 | } |
| 264 | |
| 265 | failedAtEnd(docLocation, command: QString("snippet (%1)" ).arg(a: delimiter)); |
| 266 | return snippetContent; |
| 267 | } |
| 268 | |
| 269 | QString Quoter::quoteTo(const Location &docLocation, const QString &command, const QString &pattern) |
| 270 | { |
| 271 | QString t; |
| 272 | QString = commentForCode(); |
| 273 | |
| 274 | if (pattern.isEmpty()) { |
| 275 | while (!m_plainLines.isEmpty()) { |
| 276 | QString line = m_plainLines.first(); |
| 277 | t += removeSpecialLines(line, comment); |
| 278 | } |
| 279 | } else { |
| 280 | while (!m_plainLines.isEmpty()) { |
| 281 | if (match(docLocation, pattern, line: m_plainLines.first())) { |
| 282 | return t; |
| 283 | } |
| 284 | t += getLine(); |
| 285 | } |
| 286 | failedAtEnd(docLocation, command); |
| 287 | } |
| 288 | return t; |
| 289 | } |
| 290 | |
| 291 | QString Quoter::quoteUntil(const Location &docLocation, const QString &command, |
| 292 | const QString &pattern) |
| 293 | { |
| 294 | QString t = quoteTo(docLocation, command, pattern); |
| 295 | t += getLine(); |
| 296 | return t; |
| 297 | } |
| 298 | |
| 299 | /*! |
| 300 | Retrieves and processes the next line from the snippet source. |
| 301 | |
| 302 | This function consumes the first line from both m_plainLines and |
| 303 | m_markedLines, applies indentation removal based on the \a unindent |
| 304 | parameter, appends a newline character, and updates the current location |
| 305 | tracking. |
| 306 | |
| 307 | The \a unindent parameter specifies how many leading spaces to remove from |
| 308 | the line. This is used to normalize indentation in extracted snippets so |
| 309 | that the generated output maintains proper relative indentation. The default |
| 310 | value is \c 0. |
| 311 | |
| 312 | Returns the processed line with specified indentation removed and a trailing |
| 313 | newline, or an empty string if no more lines are available. |
| 314 | |
| 315 | \note This function modifies the internal state by consuming lines from both |
| 316 | m_plainLines and m_markedLines, and advances the current code |
| 317 | location. |
| 318 | */ |
| 319 | QString Quoter::getLine(int unindent) |
| 320 | { |
| 321 | if (m_plainLines.isEmpty()) |
| 322 | return QString(); |
| 323 | |
| 324 | m_plainLines.removeFirst(); |
| 325 | |
| 326 | QString t = m_markedLines.takeFirst(); |
| 327 | int i = 0; |
| 328 | while (i < unindent && i < t.size() && t[i] == QLatin1Char(' ')) |
| 329 | i++; |
| 330 | |
| 331 | t = t.mid(position: i); |
| 332 | t += QLatin1Char('\n'); |
| 333 | m_codeLocation.advanceLines(n: t.count(c: QLatin1Char('\n'))); |
| 334 | return t; |
| 335 | } |
| 336 | |
| 337 | bool Quoter::match(const Location &docLocation, const QString &pattern0, const QString &line) |
| 338 | { |
| 339 | QString str = line; |
| 340 | while (str.endsWith(c: QLatin1Char('\n'))) |
| 341 | str.truncate(pos: str.size() - 1); |
| 342 | |
| 343 | QString pattern = pattern0; |
| 344 | if (pattern.startsWith(c: QLatin1Char('/')) && pattern.endsWith(c: QLatin1Char('/')) |
| 345 | && pattern.size() > 2) { |
| 346 | QRegularExpression rx(pattern.mid(position: 1, n: pattern.size() - 2)); |
| 347 | if (!m_silent && !rx.isValid()) { |
| 348 | docLocation.warning( |
| 349 | QStringLiteral("Invalid regular expression '%1'" ).arg(a: rx.pattern())); |
| 350 | m_silent = true; |
| 351 | } |
| 352 | return str.indexOf(re: rx) != -1; |
| 353 | } |
| 354 | trimWhiteSpace(str); |
| 355 | trimWhiteSpace(str&: pattern); |
| 356 | return str.indexOf(s: pattern) != -1; |
| 357 | } |
| 358 | |
| 359 | void Quoter::failedAtEnd(const Location &docLocation, const QString &command) |
| 360 | { |
| 361 | if (!m_silent && !command.isEmpty()) { |
| 362 | if (m_codeLocation.filePath().isEmpty()) { |
| 363 | docLocation.warning(QStringLiteral("Unexpected '\\%1'" ).arg(a: command)); |
| 364 | } else { |
| 365 | docLocation.warning(QStringLiteral("Command '\\%1' failed at end of file '%2'" ) |
| 366 | .arg(args: command, args: m_codeLocation.filePath())); |
| 367 | } |
| 368 | m_silent = true; |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | QString Quoter::() const |
| 373 | { |
| 374 | QFileInfo fi = QFileInfo(m_codeLocation.fileName()); |
| 375 | if (fi.fileName() == "CMakeLists.txt" ) |
| 376 | return "#!" ; |
| 377 | return s_commentHash.value(key: fi.suffix(), defaultValue: "//!" ); |
| 378 | } |
| 379 | |
| 380 | QString Quoter::removeSpecialLines(const QString &line, const QString &, int unindent) |
| 381 | { |
| 382 | QString t; |
| 383 | |
| 384 | // Remove special macros to support Qt namespacing. |
| 385 | QString trimmed = line.trimmed(); |
| 386 | if (trimmed.startsWith(s: "QT_BEGIN_NAMESPACE" )) { |
| 387 | getLine(); |
| 388 | } else if (trimmed.startsWith(s: "QT_END_NAMESPACE" )) { |
| 389 | getLine(); |
| 390 | t += QLatin1Char('\n'); |
| 391 | } else if (!trimmed.startsWith(s: comment)) { |
| 392 | // Ordinary code |
| 393 | t += getLine(unindent); |
| 394 | } else { |
| 395 | // Comments |
| 396 | if (line.contains(c: QLatin1Char('\n'))) |
| 397 | t += QLatin1Char('\n'); |
| 398 | getLine(); |
| 399 | } |
| 400 | return t; |
| 401 | } |
| 402 | |
| 403 | QT_END_NAMESPACE |
| 404 | |