| 1 | // Copyright (C) 2021 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
| 3 | |
| 4 | #include "text.h" |
| 5 | |
| 6 | #include <QtCore/qregularexpression.h> |
| 7 | |
| 8 | #include <cstdio> |
| 9 | |
| 10 | QT_BEGIN_NAMESPACE |
| 11 | |
| 12 | Text::Text() : m_first(nullptr), m_last(nullptr) { } |
| 13 | |
| 14 | Text::Text(const QString &str) : m_first(nullptr), m_last(nullptr) |
| 15 | { |
| 16 | operator<<(string: str); |
| 17 | } |
| 18 | |
| 19 | Text::Text(const Text &text) : m_first(nullptr), m_last(nullptr) |
| 20 | { |
| 21 | operator=(text); |
| 22 | } |
| 23 | |
| 24 | Text::~Text() |
| 25 | { |
| 26 | clear(); |
| 27 | } |
| 28 | |
| 29 | Text &Text::operator=(const Text &text) |
| 30 | { |
| 31 | if (this != &text) { |
| 32 | clear(); |
| 33 | operator<<(text); |
| 34 | } |
| 35 | return *this; |
| 36 | } |
| 37 | |
| 38 | Text &Text::operator<<(Atom::AtomType atomType) |
| 39 | { |
| 40 | return operator<<(atom: Atom(atomType)); |
| 41 | } |
| 42 | |
| 43 | Text &Text::operator<<(const QString &string) |
| 44 | { |
| 45 | return string.isEmpty() ? *this : operator<<(atom: Atom(Atom::String, string)); |
| 46 | } |
| 47 | |
| 48 | Text &Text::operator<<(const Atom &atom) |
| 49 | { |
| 50 | if (atom.count() < 2) { |
| 51 | if (m_first == nullptr) { |
| 52 | m_first = new Atom(atom.type(), atom.string()); |
| 53 | m_last = m_first; |
| 54 | } else |
| 55 | m_last = new Atom(m_last, atom.type(), atom.string()); |
| 56 | } else { |
| 57 | if (m_first == nullptr) { |
| 58 | m_first = new Atom(atom.type(), atom.string(), atom.string(i: 1)); |
| 59 | m_last = m_first; |
| 60 | } else |
| 61 | m_last = new Atom(m_last, atom.type(), atom.string(), atom.string(i: 1)); |
| 62 | } |
| 63 | return *this; |
| 64 | } |
| 65 | |
| 66 | /*! |
| 67 | Special output operator for LinkAtom. It makes a copy of |
| 68 | the LinkAtom \a atom and connects the cop;y to the list |
| 69 | in this Text. |
| 70 | */ |
| 71 | Text &Text::operator<<(const LinkAtom &atom) |
| 72 | { |
| 73 | if (m_first == nullptr) { |
| 74 | m_first = new LinkAtom(atom); |
| 75 | m_last = m_first; |
| 76 | } else |
| 77 | m_last = new LinkAtom(m_last, atom); |
| 78 | return *this; |
| 79 | } |
| 80 | |
| 81 | Text &Text::operator<<(const Text &text) |
| 82 | { |
| 83 | const Atom *atom = text.firstAtom(); |
| 84 | while (atom != nullptr) { |
| 85 | operator<<(atom: *atom); |
| 86 | atom = atom->next(); |
| 87 | } |
| 88 | return *this; |
| 89 | } |
| 90 | |
| 91 | void Text::stripFirstAtom() |
| 92 | { |
| 93 | if (m_first != nullptr) { |
| 94 | if (m_first == m_last) |
| 95 | m_last = nullptr; |
| 96 | Atom *oldFirst = m_first; |
| 97 | m_first = m_first->next(); |
| 98 | delete oldFirst; |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | void Text::stripLastAtom() |
| 103 | { |
| 104 | if (m_last != nullptr) { |
| 105 | Atom *oldLast = m_last; |
| 106 | if (m_first == m_last) { |
| 107 | m_first = nullptr; |
| 108 | m_last = nullptr; |
| 109 | } else { |
| 110 | m_last = m_first; |
| 111 | while (m_last->next() != oldLast) |
| 112 | m_last = m_last->next(); |
| 113 | m_last->setNext(nullptr); |
| 114 | } |
| 115 | delete oldLast; |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | /*! |
| 120 | This function traverses the atom list of the Text object, |
| 121 | extracting all the string parts. It concatenates them to |
| 122 | a result string and returns it. |
| 123 | */ |
| 124 | QString Text::toString() const |
| 125 | { |
| 126 | QString str; |
| 127 | const Atom *atom = firstAtom(); |
| 128 | while (atom != nullptr) { |
| 129 | if (atom->type() == Atom::String || atom->type() == Atom::AutoLink |
| 130 | || atom->type() == Atom::C) |
| 131 | str += atom->string(); |
| 132 | atom = atom->next(); |
| 133 | } |
| 134 | return str; |
| 135 | } |
| 136 | |
| 137 | /*! |
| 138 | Returns true if this Text contains the substring \a str. |
| 139 | */ |
| 140 | bool Text::contains(const QString &str) const |
| 141 | { |
| 142 | const Atom *atom = firstAtom(); |
| 143 | while (atom != nullptr) { |
| 144 | if (atom->type() == Atom::String || atom->type() == Atom::AutoLink |
| 145 | || atom->type() == Atom::C) |
| 146 | if (atom->string().contains(s: str, cs: Qt::CaseInsensitive)) |
| 147 | return true; |
| 148 | atom = atom->next(); |
| 149 | } |
| 150 | return false; |
| 151 | } |
| 152 | |
| 153 | Text Text::subText(Atom::AtomType left, Atom::AtomType right, const Atom *from, |
| 154 | bool inclusive) const |
| 155 | { |
| 156 | const Atom *begin = from ? from : firstAtom(); |
| 157 | const Atom *end; |
| 158 | |
| 159 | while (begin != nullptr && begin->type() != left) |
| 160 | begin = begin->next(); |
| 161 | if (begin != nullptr) { |
| 162 | if (!inclusive) |
| 163 | begin = begin->next(); |
| 164 | } |
| 165 | |
| 166 | end = begin; |
| 167 | while (end != nullptr && end->type() != right) |
| 168 | end = end->next(); |
| 169 | if (end == nullptr) |
| 170 | begin = nullptr; |
| 171 | else if (inclusive) |
| 172 | end = end->next(); |
| 173 | return subText(begin, end); |
| 174 | } |
| 175 | |
| 176 | Text Text::sectionHeading(const Atom *sectionLeft) |
| 177 | { |
| 178 | if (sectionLeft != nullptr) { |
| 179 | const Atom *begin = sectionLeft; |
| 180 | while (begin != nullptr && begin->type() != Atom::SectionHeadingLeft) |
| 181 | begin = begin->next(); |
| 182 | if (begin != nullptr) |
| 183 | begin = begin->next(); |
| 184 | |
| 185 | const Atom *end = begin; |
| 186 | while (end != nullptr && end->type() != Atom::SectionHeadingRight) |
| 187 | end = end->next(); |
| 188 | |
| 189 | if (end != nullptr) |
| 190 | return subText(begin, end); |
| 191 | } |
| 192 | return Text(); |
| 193 | } |
| 194 | |
| 195 | /*! |
| 196 | Prints a human-readable version of the contained atoms to stderr. |
| 197 | |
| 198 | The output is formatted as a linear list of atoms, with each atom |
| 199 | being on its own line. |
| 200 | |
| 201 | Each atom is represented by its type and its stringified-contents, |
| 202 | if any, with a space between the two. |
| 203 | |
| 204 | Indentation is used to emphasize the possible block-level |
| 205 | relationship between consecutive atoms, increasing after a |
| 206 | "Left" atom and decreasing just before a "Right" atom. |
| 207 | |
| 208 | For example, if this `Text` represented the block-comment |
| 209 | containing the text: |
| 210 | |
| 211 | \c {\l {somelink} {This is a link}} |
| 212 | |
| 213 | Then the human-readable output would look like the following: |
| 214 | |
| 215 | \badcode |
| 216 | ParaLeft |
| 217 | Link "somelink" |
| 218 | FormattingLeft "link" |
| 219 | String "This is a link" |
| 220 | FormattingRight "link" |
| 221 | String |
| 222 | ParaRight |
| 223 | \endcode |
| 224 | */ |
| 225 | void Text::dump() const |
| 226 | { |
| 227 | constexpr int minimum_indentation_level { 1 }; |
| 228 | int indentation_level { minimum_indentation_level }; |
| 229 | int indentation_width { 4 }; |
| 230 | |
| 231 | const Atom *atom = firstAtom(); |
| 232 | while (atom != nullptr) { |
| 233 | QString str = atom->string(); |
| 234 | str.replace(before: "\\" , after: "\\\\" ); |
| 235 | str.replace(before: "\"" , after: "\\\"" ); |
| 236 | str.replace(before: "\n" , after: "\\n" ); |
| 237 | static const QRegularExpression re(R"([^ -~])" ); |
| 238 | str.replace(re, after: "?" ); |
| 239 | if (!str.isEmpty()) |
| 240 | str = " \"" + str + QLatin1Char('"'); |
| 241 | |
| 242 | QString atom_type = atom->typeString(); |
| 243 | if (atom_type.contains(s: "Right" )) |
| 244 | indentation_level = std::max(a: minimum_indentation_level, b: indentation_level - 1); |
| 245 | |
| 246 | fprintf(stderr, format: "%s%s%s\n" , |
| 247 | QString(indentation_level * indentation_width, ' ').toLatin1().data(), |
| 248 | atom_type.toLatin1().data(), str.toLatin1().data()); |
| 249 | |
| 250 | if (atom_type.contains(s: "Left" )) |
| 251 | indentation_level += 1; |
| 252 | |
| 253 | atom = atom->next(); |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | Text Text::subText(const Atom *begin, const Atom *end) |
| 258 | { |
| 259 | Text text; |
| 260 | if (begin != nullptr) { |
| 261 | while (begin != end) { |
| 262 | text << *begin; |
| 263 | begin = begin->next(); |
| 264 | } |
| 265 | } |
| 266 | return text; |
| 267 | } |
| 268 | |
| 269 | void Text::clear() |
| 270 | { |
| 271 | while (m_first != nullptr) { |
| 272 | Atom *atom = m_first; |
| 273 | m_first = m_first->next(); |
| 274 | delete atom; |
| 275 | } |
| 276 | m_first = nullptr; |
| 277 | m_last = nullptr; |
| 278 | } |
| 279 | |
| 280 | int Text::compare(const Text &text1, const Text &text2) |
| 281 | { |
| 282 | if (text1.isEmpty()) |
| 283 | return text2.isEmpty() ? 0 : -1; |
| 284 | if (text2.isEmpty()) |
| 285 | return 1; |
| 286 | |
| 287 | const Atom *atom1 = text1.firstAtom(); |
| 288 | const Atom *atom2 = text2.firstAtom(); |
| 289 | |
| 290 | for (;;) { |
| 291 | if (atom1->type() != atom2->type()) |
| 292 | return (int)atom1->type() - (int)atom2->type(); |
| 293 | int cmp = QString::compare(s1: atom1->string(), s2: atom2->string()); |
| 294 | if (cmp != 0) |
| 295 | return cmp; |
| 296 | |
| 297 | if (atom1 == text1.lastAtom()) |
| 298 | return atom2 == text2.lastAtom() ? 0 : -1; |
| 299 | if (atom2 == text2.lastAtom()) |
| 300 | return 1; |
| 301 | atom1 = atom1->next(); |
| 302 | atom2 = atom2->next(); |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | /*! |
| 307 | \internal |
| 308 | |
| 309 | \brief Splits the current Text from \a start to end into a new Text object. |
| 310 | |
| 311 | Returns a new Text from the first Atom in this Text of atom type \a start. |
| 312 | */ |
| 313 | Text Text::splitAtFirst(Atom::AtomType start) { |
| 314 | if (m_first == nullptr) |
| 315 | return {}; |
| 316 | |
| 317 | Atom *previous = nullptr; |
| 318 | Atom *current = m_first; |
| 319 | |
| 320 | while (current != nullptr) { |
| 321 | if (current->type() == start) |
| 322 | break; |
| 323 | previous = current; |
| 324 | current = current->next(); |
| 325 | } |
| 326 | |
| 327 | if (!current) |
| 328 | return {}; |
| 329 | |
| 330 | Text splitText = Text(current, m_last); |
| 331 | |
| 332 | // Reset this Text's first and last atom pointers based on |
| 333 | // whether all or part of the content was extracted. |
| 334 | m_first = previous ? m_first : nullptr; |
| 335 | if (m_last = previous; m_last) |
| 336 | m_last->setNext(nullptr); |
| 337 | |
| 338 | return splitText; |
| 339 | } |
| 340 | |
| 341 | QT_END_NAMESPACE |
| 342 | |