From af4d1a20f674ec3f7d6238b4adbd86ca70846cfb Mon Sep 17 00:00:00 2001 From: mcb5637 <28106698+mcb5637@users.noreply.github.com> Date: Mon, 26 Jan 2026 11:10:25 +0100 Subject: [PATCH 1/2] Feature: RDF/XML parser (#410) Co-authored-by: Nikolaos Karalis --- CMakeLists.txt | 13 + conanfile.py | 2 + .../parser/IStreamQuadIteratorSerdImpl.cpp | 125 +++-- .../parser/IStreamQuadIteratorSerdImpl.hpp | 24 +- private/rdf4cpp/parser/XMLParser.cpp | 180 +++++++ private/rdf4cpp/parser/XMLParser.hpp | 91 ++++ .../parser/XMLParserStateTransition.hpp | 31 ++ private/rdf4cpp/parser/XMLParserUtility.cpp | 211 +++++++++ private/rdf4cpp/parser/XMLParserUtility.hpp | 151 ++++++ .../parser/XMLStates/XMLParserBaseState.cpp | 18 + .../parser/XMLStates/XMLParserBaseState.hpp | 37 ++ .../XMLStates/XMLParserCollectionState.cpp | 42 ++ .../XMLStates/XMLParserCollectionState.hpp | 41 ++ .../XMLStates/XMLParserDescriptionState.cpp | 203 ++++++++ .../XMLStates/XMLParserDescriptionState.hpp | 50 ++ .../XMLStates/XMLParserEmptyElement.cpp | 24 + .../XMLStates/XMLParserEmptyElement.hpp | 27 ++ .../XMLStates/XMLParserInitialState.cpp | 31 ++ .../XMLStates/XMLParserInitialState.hpp | 23 + .../XMLStates/XMLParserPredicateState.cpp | 48 ++ .../XMLStates/XMLParserPredicateState.hpp | 51 ++ .../parser/XMLStates/XMLParserRDFState.cpp | 24 + .../parser/XMLStates/XMLParserRDFState.hpp | 28 ++ .../XMLParserTypedLiteralPredicateState.cpp | 21 + .../XMLParserTypedLiteralPredicateState.hpp | 31 ++ .../XMLStates/XMLParserXMLLiteralState.cpp | 57 +++ .../XMLStates/XMLParserXMLLiteralState.hpp | 39 ++ src/rdf4cpp/IRIFactory.cpp | 4 + src/rdf4cpp/IRIFactory.hpp | 6 + src/rdf4cpp/parser/IStreamQuadIterator.cpp | 20 +- src/rdf4cpp/parser/IStreamQuadIterator.hpp | 37 +- src/rdf4cpp/parser/ParsingFlags.hpp | 4 +- src/rdf4cpp/parser/RDFFileParser.cpp | 2 +- src/rdf4cpp/util/CharMatcher.hpp | 55 +++ tests/CMakeLists.txt | 9 + tests/bench_SerDe.cpp | 1 + tests/parser/tests_IStreamQuadIterator.cpp | 2 +- tests/parser/tests_XMLParser.cpp | 447 ++++++++++++++++++ 38 files changed, 2129 insertions(+), 81 deletions(-) create mode 100644 private/rdf4cpp/parser/XMLParser.cpp create mode 100644 private/rdf4cpp/parser/XMLParser.hpp create mode 100644 private/rdf4cpp/parser/XMLParserStateTransition.hpp create mode 100644 private/rdf4cpp/parser/XMLParserUtility.cpp create mode 100644 private/rdf4cpp/parser/XMLParserUtility.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserBaseState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserBaseState.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserInitialState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserInitialState.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserRDFState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserRDFState.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.hpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.cpp create mode 100644 private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.hpp create mode 100644 tests/parser/tests_XMLParser.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b3492d2f6..de45ed9fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ find_package(highway REQUIRED) find_package(dice-hash REQUIRED) find_package(dice-sparse-map REQUIRED) find_package(dice-template-library REQUIRED) +find_package(libxml2 REQUIRED) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/version.hpp.in ${CMAKE_CURRENT_SOURCE_DIR}/src/rdf4cpp/version.hpp) @@ -149,6 +150,17 @@ add_library(rdf4cpp src/rdf4cpp/IRIFactory.cpp src/rdf4cpp/util/Anonymizer.cpp private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.cpp + private/rdf4cpp/parser/XMLParser.cpp + private/rdf4cpp/parser/XMLParserUtility.cpp + private/rdf4cpp/parser/XMLStates/XMLParserBaseState.cpp + private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.cpp + private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.cpp + private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.cpp + private/rdf4cpp/parser/XMLStates/XMLParserInitialState.cpp + private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.cpp + private/rdf4cpp/parser/XMLStates/XMLParserRDFState.cpp + private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.cpp + private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.cpp private/rdf4cpp/regex/RegexImpl.cpp private/rdf4cpp/regex/RegexReplacerImpl.cpp ${serd_source_files} @@ -178,6 +190,7 @@ target_link_libraries(rdf4cpp OpenSSL::Crypto uni-algo::uni-algo highway::highway + LibXml2::LibXml2 ) set_target_properties(rdf4cpp PROPERTIES diff --git a/conanfile.py b/conanfile.py index a893a463f..3376d4517 100644 --- a/conanfile.py +++ b/conanfile.py @@ -39,10 +39,12 @@ def requirements(self): self.requires("dice-hash/0.4.11", transitive_headers=True) self.requires("dice-sparse-map/0.2.9", transitive_headers=True) self.requires("dice-template-library/1.19.0", transitive_headers=True) + self.requires("libxml2/2.15.0", options={"iconv": False}) if self.options.with_test_deps: self.test_requires("doctest/2.4.11") self.test_requires("nanobench/4.3.11") + self.test_requires("libcurl/8.12.1") def set_name(self): if not hasattr(self, 'name') or self.version is None: diff --git a/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.cpp b/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.cpp index b5bb3f3a1..f605d3077 100644 --- a/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.cpp +++ b/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.cpp @@ -7,11 +7,11 @@ namespace rdf4cpp::parser { -std::string_view IStreamQuadIterator::Impl::node_into_string_view(SerdNode const *node) noexcept { +std::string_view IStreamQuadIterator::ImplSerd::node_into_string_view(SerdNode const *node) noexcept { return std::string_view{reinterpret_cast(node->buf), node->n_bytes}; } -ParsingError::Type IStreamQuadIterator::Impl::parsing_error_type_from_serd(SerdStatus const st) noexcept { +ParsingError::Type IStreamQuadIterator::ImplSerd::parsing_error_type_from_serd(SerdStatus const st) noexcept { switch (st) { case SERD_ERR_BAD_SYNTAX: return ParsingError::Type::BadSyntax; @@ -26,13 +26,13 @@ ParsingError::Type IStreamQuadIterator::Impl::parsing_error_type_from_serd(SerdS } } -nonstd::expected IStreamQuadIterator::Impl::get_bnode(std::string &&graph_str, SerdNode const *node) noexcept { +nonstd::expected IStreamQuadIterator::ImplSerd::get_bnode(std::string &&graph_str, SerdNode const *node) noexcept { auto const node_str = node_into_string_view(node); if (this->flags.contains(ParsingFlag::NoParseBlankNode)) { this->last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = "Encountered blank node while parsing. hint: blank nodes are not allowed in the current document. note: position may not be accurate and instead point to the end of the line."}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); @@ -47,22 +47,22 @@ nonstd::expected IStreamQuadIterator::Impl::get_bnode(std::str } catch (InvalidNode const &e) { // NOTE: line, col not entirely accurate as this function is called after a triple was parsed this->last_error = ParsingError{.error_type = ParsingError::Type::BadBlankNode, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = std::string{e.what()} + ". note: position may not be accurate and instead point to the end of the triple."}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); } catch (...) { this->last_error = ParsingError{.error_type = ParsingError::Type::BadBlankNode, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = "Unknown internal error. note: position may not be accurate and instead point to the end of the triple."}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); } } -nonstd::expected IStreamQuadIterator::Impl::get_iri(SerdNode const *node) noexcept { +nonstd::expected IStreamQuadIterator::ImplSerd::get_iri(SerdNode const *node) noexcept { auto const iri = [this, node]() noexcept { auto const s = node_into_string_view(node); @@ -76,8 +76,8 @@ nonstd::expected IStreamQuadIterator::Impl::get_iri(SerdNode co if (!iri.has_value()) { IRIFactoryError err = iri.error(); this->last_error = ParsingError{.error_type = ParsingError::Type::BadIri, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = std::format("invalid iri. {}. note: position may not be accurate and instead point to the end of the triple.", err)}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); @@ -86,11 +86,11 @@ nonstd::expected IStreamQuadIterator::Impl::get_iri(SerdNode co return *iri; } -nonstd::expected IStreamQuadIterator::Impl::get_prefixed_iri(SerdNode const *node) noexcept { +nonstd::expected IStreamQuadIterator::ImplSerd::get_prefixed_iri(SerdNode const *node) noexcept { if (!flags.syntax_allows_prefixes()) [[unlikely]] { this->last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = "Encountered prefix while parsing. hint: prefixes are not allowed in the current document. note: position may not be accurate and instead point to the end of the line."}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); @@ -112,15 +112,15 @@ nonstd::expected IStreamQuadIterator::Impl::get_prefixed_iri(Se if (err == IRIFactoryError::UnknownPrefix) { // NOTE: line, col not entirely accurate as this function is called after a triple was parsed this->last_error = ParsingError{.error_type = ParsingError::Type::BadCurie, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = "unknown prefix. note: position may not be accurate and instead point to the end of the triple."}; return nonstd::make_unexpected(SERD_ERR_BAD_CURIE); } else { this->last_error = ParsingError{.error_type = ParsingError::Type::BadIri, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = std::format("unable to expand curie into valid iri. {}. note: position may not be accurate and instead point to the end of the triple.", err)}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); @@ -130,7 +130,7 @@ nonstd::expected IStreamQuadIterator::Impl::get_prefixed_iri(Se return *iri; } -nonstd::expected IStreamQuadIterator::Impl::get_literal(SerdNode const *literal, SerdNode const *datatype, SerdNode const *lang) noexcept { +nonstd::expected IStreamQuadIterator::ImplSerd::get_literal(SerdNode const *literal, SerdNode const *datatype, SerdNode const *lang) noexcept { auto const literal_value = node_into_string_view(literal); auto const datatype_iri = [&]() -> std::optional> { @@ -163,15 +163,15 @@ nonstd::expected IStreamQuadIterator::Impl::get_literal(Ser } catch (InvalidNode const &e) { // NOTE: line, col not entirely accurate as this function is called after a triple was parsed this->last_error = ParsingError{.error_type = ParsingError::Type::BadLiteral, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = std::string{e.what()} + ". note: position may not be accurate and instead point to the end of the triple."}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); } catch (...) { this->last_error = ParsingError{.error_type = ParsingError::Type::BadLiteral, - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = "Unknown internal error. note: position may not be accurate and instead point to the end of the triple."}; return nonstd::make_unexpected(SERD_ERR_BAD_SYNTAX); @@ -200,8 +200,8 @@ nonstd::expected IStreamQuadIterator::Impl::get_literal(Ser return SERD_SUCCESS; } -SerdStatus IStreamQuadIterator::Impl::on_error(void *voided_self, SerdError const *error) noexcept { - auto *self = static_cast(voided_self); +SerdStatus IStreamQuadIterator::ImplSerd::on_error(void *voided_self, SerdError const *error) noexcept { + auto *self = static_cast(voided_self); size_t buf_size; SerdStatus const st = calc_required_buffer_size(error, buf_size); @@ -233,37 +233,37 @@ SerdStatus IStreamQuadIterator::Impl::on_error(void *voided_self, SerdError cons return SERD_SUCCESS; } -SerdStatus IStreamQuadIterator::Impl::on_base(void *voided_self, const SerdNode *uri) noexcept { - auto *self = static_cast(voided_self); +SerdStatus IStreamQuadIterator::ImplSerd::on_base(void *voided_self, const SerdNode *uri) noexcept { + auto *self = static_cast(voided_self); if (self->flags.contains(ParsingFlag::NoParsePrefix)) { self->last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(self->reader), - .col = serd_reader_get_current_col(self->reader), + .line = serd_reader_get_current_line(self->reader.get()), + .col = serd_reader_get_current_col(self->reader.get()), .message = "Encountered base while parsing. hint: bases are not allowed in the current document. note: position may not be accurate and instead point to the end of the line."}; } else if (auto e = self->state->iri_factory.set_base(node_into_string_view(uri)); e != IRIFactoryError::Ok) { self->last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(self->reader), - .col = serd_reader_get_current_col(self->reader), + .line = serd_reader_get_current_line(self->reader.get()), + .col = serd_reader_get_current_col(self->reader.get()), .message = std::format("Error setting base: {}. note: position may not be accurate and instead point to the end of the line.", e)}; } return SERD_SUCCESS; } -SerdStatus IStreamQuadIterator::Impl::on_prefix(void *voided_self, SerdNode const *name, SerdNode const *uri) noexcept { - auto *self = static_cast(voided_self); +SerdStatus IStreamQuadIterator::ImplSerd::on_prefix(void *voided_self, SerdNode const *name, SerdNode const *uri) noexcept { + auto *self = static_cast(voided_self); if (self->flags.contains(ParsingFlag::NoParsePrefix)) { self->last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(self->reader), - .col = serd_reader_get_current_col(self->reader), + .line = serd_reader_get_current_line(self->reader.get()), + .col = serd_reader_get_current_col(self->reader.get()), .message = "Encountered prefix while parsing. hint: prefixes are not allowed in the current document. note: position may not be accurate and instead point to the end of the line."}; } else { if (self->state->iri_factory.assign_prefix(node_into_string_view(name), node_into_string_view(uri)) != IRIFactoryError::Ok) { self->last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(self->reader), - .col = serd_reader_get_current_col(self->reader), + .line = serd_reader_get_current_line(self->reader.get()), + .col = serd_reader_get_current_col(self->reader.get()), .message = std::format("Invalid prefix: {}. note: position may not be accurate and instead point to the end of the line.", node_into_string_view(name))}; } } @@ -271,27 +271,27 @@ SerdStatus IStreamQuadIterator::Impl::on_prefix(void *voided_self, SerdNode cons return SERD_SUCCESS; } -SerdStatus IStreamQuadIterator::Impl::inspect_node(Node const &node) noexcept { +SerdStatus IStreamQuadIterator::ImplSerd::inspect_node(Node const &node) noexcept { try { state->inspect_node_func(node); return SERD_SUCCESS; } catch (std::exception const &e) { // skip last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(reader), - .col = serd_reader_get_current_col(reader), + .line = serd_reader_get_current_line(reader.get()), + .col = serd_reader_get_current_col(reader.get()), .message = std::format("Triple explicitly skipped by inspect function: {}", e.what())}; } catch (...) { last_error = ParsingError{.error_type = ParsingError::Type::BadSyntax, - .line = serd_reader_get_current_line(reader), - .col = serd_reader_get_current_col(reader), + .line = serd_reader_get_current_line(reader.get()), + .col = serd_reader_get_current_col(reader.get()), .message = "Triple explicitly skipped by inspect function"}; } return SERD_FAILURE; } -SerdStatus IStreamQuadIterator::Impl::on_stmt(void *voided_self, +SerdStatus IStreamQuadIterator::ImplSerd::on_stmt(void *voided_self, SerdStatementFlags, SerdNode const *graph, SerdNode const *subj, @@ -300,7 +300,7 @@ SerdStatus IStreamQuadIterator::Impl::on_stmt(void *voided_self, SerdNode const *obj_datatype, SerdNode const *obj_lang) noexcept { - auto *self = static_cast(voided_self); + auto *self = static_cast(voided_self); auto const graph_node = [&]() -> nonstd::expected { if (graph != nullptr) { @@ -394,42 +394,37 @@ SerdStatus IStreamQuadIterator::Impl::on_stmt(void *voided_self, return SERD_SUCCESS; } -IStreamQuadIterator::Impl::Impl(void *stream, +IStreamQuadIterator::ImplSerd::ImplSerd(void *stream, ReadFunc read, ErrorFunc error, flags_type flags, state_type *initial_state) noexcept - : reader{serd_reader_new(extract_syntax_from_flags(flags), this, nullptr, &Impl::on_base, &Impl::on_prefix, &Impl::on_stmt, nullptr)}, + : reader{serd_reader_new(extract_syntax_from_flags(flags), this, nullptr, &ImplSerd::on_base, &ImplSerd::on_prefix, &ImplSerd::on_stmt, nullptr)}, state{initial_state}, state_is_owned{false}, flags{flags} { - if (this->state == nullptr) { this->state = new state_type{}; this->state_is_owned = true; } - serd_reader_set_strict(this->reader, !flags.contains(ParsingFlag::Lax)); - serd_reader_set_error_sink(this->reader, &Impl::on_error, this); - serd_reader_start_source_stream(this->reader, read, error, stream, nullptr, 4096); + serd_reader_set_strict(this->reader.get(), !flags.contains(ParsingFlag::Lax)); + serd_reader_set_error_sink(this->reader.get(), &ImplSerd::on_error, this); + serd_reader_start_source_stream(this->reader.get(), read, error, stream, nullptr, 4096); } - -IStreamQuadIterator::Impl::~Impl() noexcept { - serd_reader_end_stream(this->reader); - serd_reader_free(this->reader); - +IStreamQuadIterator::ImplSerd::~ImplSerd() { if (this->state_is_owned) { delete this->state; } } -std::optional> IStreamQuadIterator::Impl::next() { +std::optional> IStreamQuadIterator::ImplSerd::next() { while (this->quad_buffer.empty()) { if (this->last_error.has_value()) { // handle error from last time if (this->last_error_requires_skip) { this->last_error_requires_skip = false; - if (serd_reader_skip_until_byte(this->reader, '\n') != SERD_SUCCESS) { + if (serd_reader_skip_until_byte(this->reader.get(), '\n') != SERD_SUCCESS) { // EOF reached this->end_flag = true; } @@ -439,7 +434,7 @@ std::optionalreader); + SerdStatus const st = serd_reader_read_chunk(this->reader.get()); if (st == SERD_SUCCESS) { // was able to parse something @@ -455,8 +450,8 @@ std::optional not eof // but we don't really know what because the error handler was not called this->last_error = ParsingError{.error_type = parsing_error_type_from_serd(st), - .line = serd_reader_get_current_line(this->reader), - .col = serd_reader_get_current_col(this->reader), + .line = serd_reader_get_current_line(this->reader.get()), + .col = serd_reader_get_current_col(this->reader.get()), .message = "Unknown error"}; this->last_error_requires_skip = true; } @@ -468,12 +463,12 @@ std::optionalreader); +uint64_t IStreamQuadIterator::ImplSerd::current_line() const noexcept { + return serd_reader_get_current_line(this->reader.get()); } -uint64_t IStreamQuadIterator::Impl::current_column() const noexcept { - return serd_reader_get_current_col(this->reader); +uint64_t IStreamQuadIterator::ImplSerd::current_column() const noexcept { + return serd_reader_get_current_col(this->reader.get()); } } // namespace rdf4cpp::parser diff --git a/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.hpp b/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.hpp index aca0acc4d..21b4bd7ab 100644 --- a/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.hpp +++ b/private/rdf4cpp/parser/IStreamQuadIteratorSerdImpl.hpp @@ -14,14 +14,22 @@ namespace rdf4cpp::parser { -struct IStreamQuadIterator::Impl { +struct IStreamQuadIterator::ImplSerd final : Impl { using flags_type = IStreamQuadIterator::flags_type; using state_type = IStreamQuadIterator::state_type; using ok_type = IStreamQuadIterator::ok_type; using error_type = IStreamQuadIterator::error_type; private: - SerdReader *reader; + // workaround for gcc-14 bug, erroneously warns on unsing a lambda here + // see https://github.com/NVIDIA/stdexec/issues/1143 + struct SerdReaderDtorLambda { + void operator()(SerdReader* r) const { + serd_reader_end_stream(r); + serd_reader_free(r); + } + }; + std::unique_ptr reader; state_type *state; bool state_is_owned; @@ -33,11 +41,9 @@ struct IStreamQuadIterator::Impl { flags_type flags; -private: static std::string_view node_into_string_view(SerdNode const *node) noexcept; static ParsingError::Type parsing_error_type_from_serd(SerdStatus st) noexcept; -private: nonstd::expected get_bnode(std::string &&graph_str, SerdNode const *node) noexcept; nonstd::expected get_iri(SerdNode const *node) noexcept; nonstd::expected get_prefixed_iri(SerdNode const *node) noexcept; @@ -63,13 +69,13 @@ struct IStreamQuadIterator::Impl { } public: - Impl(void *stream, + ImplSerd(void *stream, ReadFunc read, ErrorFunc, flags_type flags, state_type *state) noexcept; - ~Impl() noexcept; + ~ImplSerd() override; /** * Tries to extract the next element from the serd backend. @@ -81,10 +87,10 @@ struct IStreamQuadIterator::Impl { * expected Quad: if there was a next element and it could be parsed * unexpected ParsingError: if there was a next element but it could not be parsed */ - [[nodiscard]] std::optional> next(); + [[nodiscard]] std::optional> next() override; - [[nodiscard]] uint64_t current_line() const noexcept; - [[nodiscard]] uint64_t current_column() const noexcept; + [[nodiscard]] uint64_t current_line() const noexcept override; + [[nodiscard]] uint64_t current_column() const noexcept override; }; } // namespace rdf4cpp::parser diff --git a/private/rdf4cpp/parser/XMLParser.cpp b/private/rdf4cpp/parser/XMLParser.cpp new file mode 100644 index 000000000..cc9526512 --- /dev/null +++ b/private/rdf4cpp/parser/XMLParser.cpp @@ -0,0 +1,180 @@ +#include + +#include + +#include + +namespace rdf4cpp::parser { + xmlSAXHandler IStreamQuadIterator::ImplXML::make_sax_handler() { + xmlSAXHandler r{}; + std::memset(&r, 0, sizeof(xmlSAXHandler)); + r.initialized = XML_SAX2_MAGIC; + r.getParameterEntity = get_entity; + r.getEntity = get_entity; + r.characters = on_characters; + r.startElementNs = on_start_element; + r.endElementNs = on_end_element; + r.warning = on_error; + r.error = on_error; + return r; + } + + void IStreamQuadIterator::ImplXML::handle_state_transition(StateTransition transition) { + dice::template_library::match(std::move(transition.modify_state), + [](NoStateChange) { + // noop + }, + [this](PopState) { + state_stack_.pop_back(); + }, + [this](S &&new_state) { + state_stack_.emplace_back(std::in_place_type, std::forward(new_state)); + } + ); + } + + // implemented here, to have access to states + bool iri_reserved(std::string_view const uri, std::string_view const local_name) { + static constexpr std::array reserved = { + xml_states::RDFState::start_element, + xml_states::DescriptionState::id_attrib, + xml_states::DescriptionState::about_attrib, + xml_states::PredicateState::parse_type_attrib, + xml_states::PredicateState::resource_attrib, + xml_states::DescriptionState::node_id_attrib, + xml_states::TypedLiteralPredicateState::datatype_attrib, + xml_states::BaseState::base_attribute, + xml_states::BaseState::lang_attribute, + std::string_view("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"), + std::string_view("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"), + std::string_view("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"), + }; + return std::ranges::any_of(reserved, [&](std::string_view const e) { + return iri_equal_pieces(e, uri, local_name); + }); + } + bool iri_core_syntax(std::string_view const uri, std::string_view const local_name) { + static constexpr std::array reserved = { + xml_states::RDFState::start_element, + xml_states::DescriptionState::id_attrib, + xml_states::DescriptionState::about_attrib, + xml_states::PredicateState::parse_type_attrib, + xml_states::PredicateState::resource_attrib, + xml_states::DescriptionState::node_id_attrib, + }; + return std::ranges::any_of(reserved, [&](std::string_view const e) { + return iri_equal_pieces(e, uri, local_name); + }); + } + bool iri_old_term(std::string_view const uri, std::string_view const local_name) { + static constexpr std::array reserved = { + std::string_view{"http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"}, + std::string_view{"http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"}, + std::string_view{"http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID"}, + }; + return std::ranges::any_of(reserved, [&](std::string_view const e) { + return iri_equal_pieces(e, uri, local_name); + }); + } + + void IStreamQuadIterator::ImplXML::on_error(void *th, char const *msg, ...) { // NOLINT(*-dcl50-cpp) + va_list args; + va_list args_copy; + auto t = static_cast(th); + va_start(args, msg); // NOLINT(*-pro-bounds-array-to-pointer-decay) + va_copy(args_copy, args); // NOLINT(*-pro-bounds-array-to-pointer-decay) + std::string out{}; + out.resize(1+vsnprintf(nullptr, 0, msg, args_copy), '\0'); // NOLINT(*-pro-bounds-array-to-pointer-decay) + auto l = vsnprintf(out.data(), out.size(), msg, args); // NOLINT(*-pro-bounds-array-to-pointer-decay) + if (l > 0) { + out.resize(l); + } else { + out = "unknown error, too long to fit"; + } + t->output_.add_error(ParsingError::Type::BadSyntax, std::move(out), t->make_info()); + va_end(args); // NOLINT(*-pro-bounds-array-to-pointer-decay) + va_end(args_copy); // NOLINT(*-pro-bounds-array-to-pointer-decay) + } + xmlEntity *IStreamQuadIterator::ImplXML::get_entity(void *, xmlChar const *e) { + return xmlGetPredefinedEntity(e); + } + void IStreamQuadIterator::ImplXML::on_characters(void *th, xmlChar const *e, int const len) { + auto *t = static_cast(th); + t->handle_state_transition(t->current_state().on_characters(t->output_, from_xml_char(e, len), t->make_info())); + } + void IStreamQuadIterator::ImplXML::on_start_element(void *th, xmlChar const *local_name, [[maybe_unused]] xmlChar const *prefix, xmlChar const *uri, + [[maybe_unused]] int n_namespaces, [[maybe_unused]] xmlChar const **namespaces, + int const n_attributes, [[maybe_unused]] int n_defaulted, xmlChar const **attributes) { + auto *t = static_cast(th); + t->handle_state_transition(t->current_state().on_start_element(t->output_, from_xml_char(local_name), from_xml_char(uri), + std::span{reinterpret_cast(attributes), static_cast(n_attributes)}, t->make_info())); + } + void IStreamQuadIterator::ImplXML::on_end_element(void *th, [[maybe_unused]] xmlChar const *local_name, [[maybe_unused]] xmlChar const *prefix, [[maybe_unused]] xmlChar const *uri) { + auto *t = static_cast(th); + t->handle_state_transition(t->current_state().on_end_element(t->output_, t->make_info())); + } + + XMLStateInfo IStreamQuadIterator::ImplXML::make_info() const { + std::string_view base = ""; + for (auto const &s : state_stack_ | std::views::reverse) { + std::string_view const v = s->base; + if (!v.empty()) { + base = v; + break; + } + } + + std::string_view lang_tag = ""; + for (auto const &s : state_stack_ | std::views::reverse) { + std::string_view const v = s->lang_tag; + if (!v.empty()) { + lang_tag = v; + break; + } + } + + xmlChar const *data; + int size = 1024; + int off = 0; + xmlCtxtGetInputWindow(context_.get(), 0, &data, &size, &off); + std::string_view const source{reinterpret_cast(data), static_cast(size)}; + + return XMLStateInfo{ + current_line(), + current_column(), + base, + lang_tag, + source, + off, + }; + } + + IStreamQuadIterator::ImplXML::ImplXML(void *obj, ReadFunc const read, ErrorFunc const err, EOFFunc const eof, state_type *state) + : handler_(make_sax_handler()), + context_(xmlCreatePushParserCtxt(&handler_, this, nullptr, 0, "rdf/xml")), + reader_obj_(obj), read_func_(read), error_func_(err), eof_func_(eof), + output_(state) { + xmlCtxtSetOptions(context_.get(), XML_PARSE_NOENT | XML_PARSE_PEDANTIC | XML_PARSE_NOCDATA | XML_PARSE_NO_XXE | XML_PARSE_BIG_LINES); + state_stack_.reserve(10); + state_stack_.emplace_back(std::in_place_type); + + current_state().base = output_.current_base_iri(); + } + + std::optional IStreamQuadIterator::ImplXML::next() { + std::array buffer; // NOLINT(*-pro-type-member-init) + while (output_.empty() && error_func_(reader_obj_) == 0 && eof_func_(reader_obj_) == 0) { + auto const read = read_func_(buffer.data(), sizeof(char), buffer.size(), reader_obj_); + xmlParseChunk(context_.get(), buffer.data(), static_cast(read), eof_func_(reader_obj_) != 0); + } + return output_.next(); + } + + uint64_t IStreamQuadIterator::ImplXML::current_line() const noexcept { + return xmlSAX2GetLineNumber(context_.get()); + } + + uint64_t IStreamQuadIterator::ImplXML::current_column() const noexcept { + return xmlSAX2GetColumnNumber(context_.get()); + } +} // namespace rdf4cpp::parser \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLParser.hpp b/private/rdf4cpp/parser/XMLParser.hpp new file mode 100644 index 000000000..bf1e38d48 --- /dev/null +++ b/private/rdf4cpp/parser/XMLParser.hpp @@ -0,0 +1,91 @@ +#ifndef RDF4CPP_XMLPARSER_H +#define RDF4CPP_XMLPARSER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +namespace rdf4cpp::parser { + struct IStreamQuadIterator::ImplXML final : Impl { + private: + xmlSAXHandler handler_; + // workaround for gcc-14 bug, erroneously warns on unsing a lambda here + // see https://github.com/NVIDIA/stdexec/issues/1143 + struct XmlParserCtxtDtorLambda { + void operator()(xmlParserCtxt *c) const { + xmlFreeParserCtxt(c); + } + }; + std::unique_ptr context_; + void *reader_obj_; + ReadFunc read_func_; + ErrorFunc error_func_; + EOFFunc eof_func_; + XMLOutputQueue output_; + + using State = dice::template_library::inplace_polymorphic; + + std::vector state_stack_; // Note: we use a vector because std::stack does not have .reserve() + + [[nodiscard]] xml_states::BaseState const ¤t_state() const noexcept { + return *state_stack_.back(); + } + + [[nodiscard]] xml_states::BaseState ¤t_state() noexcept { + return *state_stack_.back(); + } + + static xmlSAXHandler make_sax_handler(); + + void handle_state_transition(StateTransition transition); + + static void on_error(void *th, char const *msg, ...); + static xmlEntity *get_entity(void *th, xmlChar const *e); + static void on_characters(void *th, xmlChar const *e, int len); + static void on_start_element(void *th, xmlChar const *local_name, xmlChar const *prefix, xmlChar const *uri, + int n_namespaces, xmlChar const **namespaces, + int n_attributes, int n_defaulted, xmlChar const **attributes); + static void on_end_element(void *th, xmlChar const *local_name, xmlChar const *prefix, xmlChar const *uri); + + [[nodiscard]] XMLStateInfo make_info() const; + + public: + ImplXML(void *obj, ReadFunc read, ErrorFunc err, EOFFunc eof, state_type *state); + + ImplXML(ImplXML const &) = delete; + ImplXML &operator=(ImplXML const &) = delete; + ImplXML(ImplXML &&) = delete; + ImplXML &operator=(ImplXML &&) = delete; + ~ImplXML() override = default; + + [[nodiscard]] std::optional next() override; + + [[nodiscard]] uint64_t current_line() const noexcept override; + [[nodiscard]] uint64_t current_column() const noexcept override; + }; +} // namespace rdf4cpp::parser + +#endif //RDF4CPP_XMLPARSER_H diff --git a/private/rdf4cpp/parser/XMLParserStateTransition.hpp b/private/rdf4cpp/parser/XMLParserStateTransition.hpp new file mode 100644 index 000000000..d6cf0c390 --- /dev/null +++ b/private/rdf4cpp/parser/XMLParserStateTransition.hpp @@ -0,0 +1,31 @@ +#ifndef RDF4CPP_XMLPARSERSTATETRANSITION_H +#define RDF4CPP_XMLPARSERSTATETRANSITION_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rdf4cpp::parser { + struct StateTransition { + using ModifyStateStack = std::variant; + + ModifyStateStack modify_state; + + template + explicit StateTransition(Args &&...args) : modify_state(std::forward(args)...) { + } + + StateTransition() noexcept : StateTransition(std::in_place_type) { + } + }; +} + +#endif //RDF4CPP_XMLPARSERSTATETRANSITION_H diff --git a/private/rdf4cpp/parser/XMLParserUtility.cpp b/private/rdf4cpp/parser/XMLParserUtility.cpp new file mode 100644 index 000000000..92c654f71 --- /dev/null +++ b/private/rdf4cpp/parser/XMLParserUtility.cpp @@ -0,0 +1,211 @@ +#include +#include +#include + +namespace rdf4cpp::parser { + XMLOutputQueue::XMLOutputQueue(state_type *state) : state_(state) { + if (state_ == nullptr) { + state_ = new state_type(); + state_is_owned_ = true; + } + } + + XMLOutputQueue::~XMLOutputQueue() { + if (state_is_owned_) { + delete state_; + } + } + + bool XMLOutputQueue::empty() const { + return result_queue_.empty(); + } + + std::optional XMLOutputQueue::next() { + if (result_queue_.empty()) { + return std::nullopt; + } + auto r = result_queue_.front(); + result_queue_.pop_front(); + return r; + } + + std::string_view XMLOutputQueue::current_base_iri() const { + return state_->iri_factory.get_base(); + } + + void XMLOutputQueue::add_error(ParsingError::Type ty, std::string msg, XMLStateInfo const &i) { + result_queue_.emplace_back(nonstd::unexpect, ty, i.line, i.column, std::move(msg)); + } + void XMLOutputQueue::add_old_term_error(XMLStateInfo const &i) { + add_error(ParsingError::Type::BadSyntax, "rdf:bagID, rdf:aboutEach and rdf:aboutEachPrefix were removed", i); + } + + void XMLOutputQueue::add_statement(Node subject, IRI predicate, Node object, IRI reify) { + if (subject.null() || predicate.null() || object.null()) { + return; + } + result_queue_.emplace_back(Quad(subject, predicate, object)); + if (!reify.null()) { + result_queue_.emplace_back(Quad(reify, make_hardcoded_iri(reify_subject), subject)); + result_queue_.emplace_back(Quad(reify, make_hardcoded_iri(reify_predicate), predicate)); + result_queue_.emplace_back(Quad(reify, make_hardcoded_iri(reify_object), object)); + result_queue_.emplace_back(Quad(reify, make_type_iri(), make_hardcoded_iri(reify_type))); + } + } + + IRI XMLOutputQueue::make_hardcoded_iri(std::string_view const iri) const { + return IRI::make_unchecked(iri, state_->node_storage); + } + + IRI XMLOutputQueue::make_type_iri() const { + return IRI::rdf_type(state_->node_storage); + } + + template + NT XMLOutputQueue::inspect_node(NT node, XMLStateInfo const &i) { + try { + state_->inspect_node_func(node); + return node; + } catch (std::exception &e) { + add_error(ParsingError::Type::BadSyntax, std::format("Triple explicitly skipped by inspect function: {}", e.what()), i); + } catch (...) { + add_error(ParsingError::Type::BadSyntax, "Triple explicitly skipped by inspect function", i); + } + return NT::make_null(); + } + + IRI XMLOutputQueue::make_iri(std::string_view const iri, std::string_view const base, XMLStateInfo const &i) { + if (base.empty()) { + state_->iri_factory.set_base_unchecked(i.base); + } else { + state_->iri_factory.set_base_unchecked(base); + } + auto exp = state_->iri_factory.from_maybe_relative(iri, state_->node_storage); + if (exp.has_value()) { + return inspect_node(*exp, i); + } else { + add_error(ParsingError::Type::BadIri, std::format("{}: {}", iri, exp.error()), i); + return IRI::make_null(); + } + } + + IRI XMLOutputQueue::make_iri(std::string_view const uri, std::string_view const local_name, std::string_view const base, XMLStateInfo const &i) { + std::string iri{uri}; + iri.append(local_name); + return make_iri(iri, base, i); + } + + bool is_ncname(std::string_view v) { + using namespace util::char_matcher_detail; + + if (v.empty()) { + return false; + } + if (!match(v)) { + return false; + } + auto r = v | una::views::utf8; + if (r.begin() == r.end()) { + return false; + } + return xml::NCNameStartChar.match(static_cast(*r.begin())); + } + + IRI XMLOutputQueue::make_id(std::string_view const local_name, std::string_view const base, XMLStateInfo const &i) { + if (!is_ncname(local_name)) { + add_error(ParsingError::Type::BadIri, std::format("{}: is not a valid NCName (required for rdf:ID)", local_name), i); + return IRI::make_null(); + } + std::string local = "#"; + local.append(local_name); + auto iri = make_iri(local, base, i); + if (reserved_ids_.contains(iri.backend_handle().id())) { + add_error(ParsingError::Type::BadIri, std::format("{}: is already used as a rdf:ID", iri), i); + return IRI::make_null(); + } + reserved_ids_.insert(iri.backend_handle().id()); + return iri; + } + + Node XMLOutputQueue::make_bn(std::optional name, XMLStateInfo const &i) { + std::string n = ""; + if (!name.has_value()) { + n = std::format("{}_bn", next_bn_index_++); + name = n; + } + else if (!is_ncname(*name)) { + add_error(ParsingError::Type::BadIri, std::format("{}: is not a valid NCName (required for rdf:nodeID)", *name), i); + return IRI::make_null(); + } + try { + if (state_->blank_node_scope_manager == nullptr) { + return inspect_node(BlankNode::make(*name, state_->node_storage), i); + } else { + return inspect_node(state_->blank_node_scope_manager.scope("").get_or_generate_node(*name, state_->node_storage), i); + } + } catch (InvalidNode const &e) { + add_error(ParsingError::Type::BadBlankNode, e.what(), i); + return BlankNode::make_null(); + } catch (...) { + add_error(ParsingError::Type::BadBlankNode, "unknown error", i); + return BlankNode::make_null(); + } + } + + Literal XMLOutputQueue::make_literal(std::string_view value, std::optional datatype, std::optional lang_tag, XMLStateInfo const &i) { + Literal l = Literal::make_null(); + try { + if (datatype.has_value()) { + l = Literal::make_typed(value, *datatype, state_->node_storage); + } else { + if (!lang_tag.has_value() || lang_tag->empty()) { + lang_tag = i.lang_tag; + } + if (lang_tag.has_value() && !lang_tag->empty()) { + l = Literal::make_lang_tagged(value, *lang_tag, state_->node_storage); + } else { + l = Literal::make_simple(value, state_->node_storage); + } + } + } catch (InvalidNode const &e) { + add_error(ParsingError::Type::BadLiteral, e.what(), i); + } catch (...) { + add_error(ParsingError::Type::BadLiteral, "unknown error", i); + } + return inspect_node(l, i); + } + + std::string_view trim_left(std::string_view v) { + auto s = v.find_first_not_of(" \t\r\n"); + if (s == std::string_view::npos) { + return ""; + } + v.remove_prefix(s); + // ReSharper disable once CppDFALocalValueEscapesFunction + return v; + } + + bool iri_equal_pieces(std::string_view const full_iri, std::string_view const uri, std::string_view const local_name) { + if (full_iri.size() != local_name.size() + uri.size()) { + return false; + } + return full_iri.starts_with(uri) && full_iri.ends_with(local_name); + } + bool iri_in_xml_namespace(std::string_view uri, std::string_view local_name) { + static constexpr std::string_view xml_namespace = "http://www.w3.org/XML/1998/namespace"; + if (uri.length() + local_name.length() >= xml_namespace.length()) { + if (uri != xml_namespace.substr(0, uri.length())) { + return false; + } + if (uri.length() < xml_namespace.length() && !local_name.starts_with(xml_namespace.substr(uri.length()))) { + return false; + } + return true; + } + if (uri.empty() && local_name.starts_with("xml")) { + return true; + } + return false; + } + +} // namespace rdf4cpp::parser \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLParserUtility.hpp b/private/rdf4cpp/parser/XMLParserUtility.hpp new file mode 100644 index 000000000..cfe949abd --- /dev/null +++ b/private/rdf4cpp/parser/XMLParserUtility.hpp @@ -0,0 +1,151 @@ +#ifndef RDF4CPP_XMLPARSERSTATECOLLECTOR_H +#define RDF4CPP_XMLPARSERSTATECOLLECTOR_H + +#include + +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +namespace rdf4cpp::parser { + inline std::string_view from_xml_char(xmlChar const *s) { + if (s == nullptr) { + return ""; + } + // ReSharper disable once CppDFALocalValueEscapesFunction + return {reinterpret_cast(s)}; + } + + inline std::string_view from_xml_char(xmlChar const *s, xmlChar const *e) { + if (s == nullptr) { + return ""; + } + // ReSharper disable once CppDFALocalValueEscapesFunction + return {reinterpret_cast(s), reinterpret_cast(e)}; + } + + inline std::string_view from_xml_char(xmlChar const *s, int const n) { + if (s == nullptr) { + return ""; + } + // ReSharper disable once CppDFALocalValueEscapesFunction + return {reinterpret_cast(s), static_cast(n)}; + } + + struct XMLAttribute { + xmlChar const *local_name_raw; + xmlChar const *prefix_raw; + xmlChar const *uri_raw; + xmlChar const *value_start_raw; + xmlChar const *value_end_raw; + + [[nodiscard]] std::string_view value() const { + return from_xml_char(value_start_raw, value_end_raw); + } + + [[nodiscard]] std::string_view local_name() const { + return from_xml_char(local_name_raw); + } + + [[nodiscard]] std::string_view uri() const { + return from_xml_char(uri_raw); + } + }; + + struct XMLStateInfo { + uint64_t line; + uint64_t column; + std::string_view base; + std::string_view lang_tag; + std::string_view source; + int source_offset; + }; + + struct XMLOutputQueue { + using value_type = IStreamQuadIterator::value_type; + using state_type = IStreamQuadIterator::state_type; + + private: + std::deque result_queue_; + size_t next_bn_index_ = 0; + state_type *state_; + bool state_is_owned_ = false; + dice::sparse_map::sparse_set reserved_ids_; + + static constexpr std::string_view reify_subject = "http://www.w3.org/1999/02/22-rdf-syntax-ns#subject"; + static constexpr std::string_view reify_predicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate"; + static constexpr std::string_view reify_object = "http://www.w3.org/1999/02/22-rdf-syntax-ns#object"; + static constexpr std::string_view reify_type = "http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement"; + + template + [[nodiscard]] NT inspect_node(NT node, XMLStateInfo const &i); + + public: + explicit XMLOutputQueue(state_type *state); + ~XMLOutputQueue(); + + XMLOutputQueue(XMLOutputQueue const &) = delete; + XMLOutputQueue &operator=(XMLOutputQueue const &) = delete; + XMLOutputQueue(XMLOutputQueue &&) = delete; + XMLOutputQueue &operator=(XMLOutputQueue &&) = delete; + + [[nodiscard]] bool empty() const; + [[nodiscard]] std::optional next(); + [[nodiscard]] std::string_view current_base_iri() const; + + void add_error(ParsingError::Type ty, std::string msg, XMLStateInfo const &i); + void add_old_term_error(XMLStateInfo const &i); + /** + * add statement to the output list, if none of the components is null + * (null is used to track an already inserted parse error for that component) + */ + void add_statement(Node subject, IRI predicate, Node object, IRI reify); + /** + * create an IRI with no checks, intended for hardcoded IRIs like reify_subject + */ + [[nodiscard]] IRI make_hardcoded_iri(std::string_view iri) const; + [[nodiscard]] IRI make_type_iri() const; + [[nodiscard]] IRI make_iri(std::string_view iri, std::string_view base, XMLStateInfo const &i); + [[nodiscard]] IRI make_iri(std::string_view uri, std::string_view local_name, std::string_view base, XMLStateInfo const &i); + /** + * create the IRI for an id_attrib, including uniqueness check + */ + [[nodiscard]] IRI make_id(std::string_view local_name, std::string_view base, XMLStateInfo const &i); + [[nodiscard]] Node make_bn(std::optional name, XMLStateInfo const &i); + /** + * creates a literal + * @param value + * @param datatype + * @param lang_tag (ignored, if datatype is set) + * @param i + * @return + */ + [[nodiscard]] Literal make_literal(std::string_view value, std::optional datatype, std::optional lang_tag, XMLStateInfo const &i); + }; + + struct PopState {}; + struct NoStateChange {}; + + struct StateTransition; + + /** + * removes whitespace according to xml spec + */ + [[nodiscard]] std::string_view trim_left(std::string_view v); + [[nodiscard]] bool iri_equal_pieces(std::string_view full_iri, std::string_view uri, std::string_view local_name); + [[nodiscard]] bool iri_reserved(std::string_view uri, std::string_view local_name); + [[nodiscard]] bool iri_core_syntax(std::string_view uri, std::string_view local_name); + [[nodiscard]] bool iri_old_term(std::string_view uri, std::string_view local_name); + [[nodiscard]] bool iri_in_xml_namespace(std::string_view uri, std::string_view local_name); +} // namespace rdf4cpp::parser + +#endif //RDF4CPP_XMLPARSERSTATECOLLECTOR_H diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserBaseState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserBaseState.cpp new file mode 100644 index 000000000..773f92003 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserBaseState.cpp @@ -0,0 +1,18 @@ +#include + +namespace rdf4cpp::parser::xml_states { + BaseState::InheritedAttributeInfo BaseState::get_inherited_attributes(XMLOutputQueue &out, std::span const attributes, XMLStateInfo const &info) { + InheritedAttributeInfo r{}; + for (auto const &a : attributes) { + if (iri_equal_pieces(base_attribute, a.uri(), a.local_name())) { + if (auto e = IRIView(a.value()).quick_validate(); e != IRIFactoryError::Ok) { + out.add_error(ParsingError::Type::BadIri, std::format("invalid base IRI ({}): {}", e, a.value()), info); + } + r.base = a.value(); + } else if (iri_equal_pieces(lang_attribute, a.uri(), a.local_name())) { + r.lang_tag = a.value(); + } + } + return r; + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserBaseState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserBaseState.hpp new file mode 100644 index 000000000..8f3d1e0d3 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserBaseState.hpp @@ -0,0 +1,37 @@ +#ifndef RDF4CPP_XMLPARSERBASESTATE_H +#define RDF4CPP_XMLPARSERBASESTATE_H + +#include + +namespace rdf4cpp::parser::xml_states { + /** + * most states handle one or more of the elements in https://www.w3.org/TR/rdf11-xml/#section-Infoset-Grammar . + * note that the creation of a state is done by on_start_element of the previous state. + * each state holds information on base iri and language tag defined on the corresponding xml element. + */ + struct BaseState { // NOLINT(*-special-member-functions) + virtual ~BaseState() = default; + [[nodiscard]] virtual StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) = 0; + [[nodiscard]] virtual StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) = 0; + [[nodiscard]] virtual StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) = 0; + virtual void move_to(BaseState *b) noexcept = 0; + + struct InheritedAttributeInfo { + std::string_view base = ""; + std::string_view lang_tag = ""; + }; + + std::string base; + std::string lang_tag; + + explicit BaseState(InheritedAttributeInfo const &i) + : base(i.base), lang_tag(i.lang_tag) { + } + + static constexpr std::string_view base_attribute = "http://www.w3.org/XML/1998/namespacebase"; + static constexpr std::string_view lang_attribute = "http://www.w3.org/XML/1998/namespacelang"; + static InheritedAttributeInfo get_inherited_attributes(XMLOutputQueue &out, std::span attributes, XMLStateInfo const &info); + }; +} // namespace rdf4cpp::parser::xml_states + +#endif //RDF4CPP_XMLPARSERBASESTATE_H diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.cpp new file mode 100644 index 000000000..424c93d15 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.cpp @@ -0,0 +1,42 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + StateTransition CollectionState::on_characters(XMLOutputQueue &out, std::string_view const chars, XMLStateInfo const &info) { + if (!trim_left(chars).empty()) { + out.add_error(ParsingError::Type::BadSyntax, "expected element, found characters", info); + } + return {}; + } + + StateTransition CollectionState::on_start_element(XMLOutputQueue &out, std::string_view const local_name, std::string_view const uri, std::span const attributes, XMLStateInfo const &info) { + // https://www.w3.org/TR/rdf11-xml/#parseTypeCollectionPropertyElt + // only node elements (=>DescriptionState) can appear in the list + auto [transition, obj] = DescriptionState::enter(out, local_name, uri, attributes, info); + if (first) { + first = false; + last_bn = out.make_bn(std::nullopt, info); + out.add_statement(subject, predicate, last_bn, reify); + } else { + auto const bn = out.make_bn(std::nullopt, info); + out.add_statement(last_bn, out.make_hardcoded_iri(iri_rest), bn, IRI::make_null()); + last_bn = bn; + } + out.add_statement(last_bn, out.make_hardcoded_iri(iri_first), obj, IRI::make_null()); + return transition; + } + + StateTransition CollectionState::on_end_element(XMLOutputQueue &out, [[maybe_unused]] XMLStateInfo const &info) { + auto const nil = out.make_hardcoded_iri(iri_nil); + if (first) { + out.add_statement(subject, predicate, nil, reify); + } else { + out.add_statement(last_bn, out.make_hardcoded_iri(iri_rest), nil, IRI::make_null()); + } + return StateTransition{std::in_place_type}; + } + void CollectionState::move_to(BaseState *b) noexcept { + new (b) CollectionState(std::move(*this)); + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.hpp new file mode 100644 index 000000000..3ea1eb934 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserCollectionState.hpp @@ -0,0 +1,41 @@ +#ifndef XMLPARSERCOLLECTIONSTATE_HPP +#define XMLPARSERCOLLECTIONSTATE_HPP + +#include + +namespace rdf4cpp::parser::xml_states { + /** + * state for https://www.w3.org/TR/rdf11-xml/#parseTypeCollectionPropertyElt + * + * example: + * + * + * ... + * ... + * ... + * + * + */ + struct CollectionState final : BaseState { + StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) override; + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + Node subject; + IRI predicate; + Node last_bn = Node::make_null(); + IRI reify; + bool first = true; + + CollectionState(InheritedAttributeInfo const &i, Node sub, IRI pred, IRI reify) + : BaseState(i), subject(sub), predicate(pred), reify(reify) { + } + + static constexpr std::string_view iri_nil = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"; + static constexpr std::string_view iri_rest = "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"; + static constexpr std::string_view iri_first = "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"; + }; +} // namespace rdf4cpp::parser::xml_states + +#endif // XMLPARSERCOLLECTIONSTATE_HPP \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.cpp new file mode 100644 index 000000000..adedc2d60 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.cpp @@ -0,0 +1,203 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + StateTransition DescriptionState::on_characters(XMLOutputQueue &out, std::string_view const chars, XMLStateInfo const &info) { + if (!trim_left(chars).empty()) { + out.add_error(ParsingError::Type::BadSyntax, "expected predicate, found characters", info); + } + return {}; + } + + StateTransition DescriptionState::on_start_element(XMLOutputQueue &out, std::string_view const local_name, std::string_view const uri, std::span attributes, XMLStateInfo const &info) { + if (iri_core_syntax(uri, local_name)) { + out.add_error(ParsingError::Type::BadSyntax, "core syntax terms are not allowed as predicates", info); + return StateTransition(std::in_place_type); + } + if (iri_equal_pieces(start_element, uri, local_name)) { + out.add_error(ParsingError::Type::BadSyntax, "rdf:Description is not allowed as predicate", info); + return StateTransition(std::in_place_type); + } + if (iri_old_term(uri, local_name)) { + out.add_old_term_error(info); + return StateTransition(std::in_place_type); + } + + auto const inherited_attribute_info = get_inherited_attributes(out, attributes, info); + IRI predicate; + if (iri_equal_pieces(PredicateState::list_start_element, uri, local_name)) { + predicate = out.make_iri(std::format("http://www.w3.org/1999/02/22-rdf-syntax-ns#_{}", list_current++), inherited_attribute_info.base, info); + } else { + predicate = out.make_iri(uri, local_name, inherited_attribute_info.base, info); + } + std::optional datatype = std::nullopt; + std::optional sub = std::nullopt; + auto check_only_one = [&sub, &out, &info]() { + if (sub.has_value()) { + out.add_error(ParsingError::Type::BadSyntax, "expected only one of rdf:ID, rdf:about, and rdf:nodeID", info); + return true; + } + return false; + }; + IRI reify = IRI::make_null(); + bool parse_resource = false; + bool parse_literal = false; + bool parse_collection = false; + for (auto const &att : attributes) { + if (iri_equal_pieces(TypedLiteralPredicateState::datatype_attrib, att.uri(), att.local_name())) { + datatype = out.make_iri(att.value(), inherited_attribute_info.base, info); + } else if (iri_equal_pieces(PredicateState::resource_attrib, att.uri(), att.local_name())) { + check_only_one(); + sub = out.make_iri(att.value(), inherited_attribute_info.base, info); + } else if (iri_equal_pieces(node_id_attrib, att.uri(), att.local_name())) { + check_only_one(); + sub = out.make_bn(att.value(), info); + } else if (iri_equal_pieces(id_attrib, att.uri(), att.local_name())) { + reify = out.make_id(att.value(), inherited_attribute_info.base, info); + } else if (iri_equal_pieces(PredicateState::parse_type_attrib, att.uri(), att.local_name())) { + if (att.value() == PredicateState::parse_type_resource) { + parse_resource = true; + } else if (att.value() == PredicateState::parse_type_collection) { + parse_collection = true; + } else { // literal is the default case thats supposed to be used if anything unknown appears + parse_literal = true; + } + } + } + // need to loop twice, because anything in the second loop needs a established sub + // and the xml spec allows attributes in arbitrary order + for (auto const &att : attributes) { + if (iri_equal_pieces(PredicateState::list_start_element, att.uri(), att.local_name())) { + out.add_error(ParsingError::Type::BadSyntax, "rdf:li is not allowed as attribute", info); + continue; + } + if (iri_old_term(att.uri(), att.local_name())) { + out.add_old_term_error(info); + continue; + } + if (PredicateState::iri_reserved_predicate(att.uri(), att.local_name())) { + continue; + } + // the only reference i found to this is: https://github.com/w3c/rdf-tests/blob/main/rdf/rdf11/rdf-xml/unrecognised-xml-attributes/test001.rdf + if (iri_in_xml_namespace(att.uri(), att.local_name())) { + continue; + } + if (!sub.has_value()) { + sub = out.make_bn(std::nullopt, info); + } + if (iri_equal_pieces(type_attrib, att.uri(), att.local_name())) { + IRI const obj = out.make_iri(att.value(), base, info); + out.add_statement(*sub, out.make_type_iri(), obj, IRI::make_null()); + } else { + IRI const pred = out.make_iri(att.uri(), att.local_name(), base, info); + Literal const obj = out.make_literal(att.value(), std::nullopt, inherited_attribute_info.lang_tag, info); + out.add_statement(*sub, pred, obj, IRI::make_null()); + } + } + if (sub.has_value() && (parse_collection || parse_literal || parse_resource)) { + out.add_error(ParsingError::Type::BadSyntax, "expected only one of rdf:parseType, rdf:nodeID and rdf:resource", info); + } + if (datatype.has_value()) { + return StateTransition(std::in_place_type, inherited_attribute_info, subject, predicate, reify, *datatype); + } else if (sub.has_value()) { + out.add_statement(subject, predicate, *sub, reify); + return StateTransition(std::in_place_type); // predicate is expected to be empty, object defined as attribute + // example: https://www.w3.org/2013/RDFXMLTests/rdfms-empty-property-elements/test013.rdf + } else if (parse_resource) { + Node const obj = out.make_bn(std::nullopt, info); + out.add_statement(subject, predicate, obj, reify); + return StateTransition(std::in_place_type, inherited_attribute_info, obj); + } else if (parse_literal) { + return StateTransition(std::in_place_type, inherited_attribute_info, subject, predicate, reify, info); + } else if (parse_collection) { + return StateTransition(std::in_place_type, inherited_attribute_info, subject, predicate, reify); + } else { + return StateTransition(std::in_place_type, inherited_attribute_info, subject, predicate, reify); + } + } + + StateTransition DescriptionState::on_end_element([[maybe_unused]] XMLOutputQueue &out, [[maybe_unused]] XMLStateInfo const &info) { + return StateTransition{std::in_place_type}; + } + void DescriptionState::move_to(BaseState *b) noexcept { + new (b) DescriptionState(std::move(*this)); + } + std::pair DescriptionState::enter(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) { + auto const inherited_attribute_info = get_inherited_attributes(out, attributes, info); + Node sub = Node::make_null(); + auto check_only_one = [&sub, &out, &info]() { + if (!sub.null()) { + out.add_error(ParsingError::Type::BadSyntax, "expected only one of rdf:ID, rdf:about, and rdf:nodeID", info); + return true; + } + return false; + }; + for (auto const &att : attributes) { + if (iri_equal_pieces(about_attrib, att.uri(), att.local_name())) { + if (check_only_one()) { + continue; + } + sub = out.make_iri(att.value(), inherited_attribute_info.base, info); + } else if (iri_equal_pieces(id_attrib, att.uri(), att.local_name())) { + if (check_only_one()) { + continue; + } + sub = out.make_id(att.value(), inherited_attribute_info.base, info); + } else if (iri_equal_pieces(node_id_attrib, att.uri(), att.local_name())) { + if (check_only_one()) { + continue; + } + sub = out.make_bn(att.value(), info); + } + } + if (sub.null()) { + sub = out.make_bn(std::nullopt, info); + } + if (!iri_equal_pieces(start_element, uri, local_name)) { + if (iri_equal_pieces(PredicateState::list_start_element, uri, local_name)) { + out.add_error(ParsingError::Type::BadSyntax, "rdf:li is not allowed as element type", info); + } + else if (iri_core_syntax(uri, local_name)) { + out.add_error(ParsingError::Type::BadSyntax, "core syntax terms are not allowed as element type", info); + } + else if (iri_old_term( uri, local_name)) { + out.add_old_term_error(info); + } + else { + IRI const obj = out.make_iri(uri, local_name, inherited_attribute_info.base, info); + if (!obj.null()) { + out.add_statement(sub, out.make_type_iri(), obj, IRI::make_null()); + } + } + } + for (auto const &att : attributes) { + if (iri_equal_pieces(PredicateState::list_start_element, att.uri(), att.local_name())) { + out.add_error(ParsingError::Type::BadSyntax, "rdf:li is not allowed as attribute", info); + continue; + } + if (iri_old_term(att.uri(), att.local_name())) { + out.add_old_term_error(info); + continue; + } + if (PredicateState::iri_reserved_predicate(att.uri(), att.local_name())) { + continue; + } + if (iri_in_xml_namespace(att.uri(), att.local_name())) { + continue; + } + if (iri_equal_pieces(type_attrib, att.uri(), att.local_name())) { + IRI const obj = out.make_iri(att.value(), inherited_attribute_info.base, info); + out.add_statement(sub, out.make_type_iri(), obj, IRI::make_null()); + } else { + IRI const pred = out.make_iri(att.uri(), att.local_name(), inherited_attribute_info.base, info); + Literal const obj = out.make_literal(att.value(), std::nullopt, inherited_attribute_info.lang_tag, info); + out.add_statement(sub, pred, obj, IRI::make_null()); + } + } + return { + StateTransition{std::in_place_type, inherited_attribute_info, sub}, + sub, + }; + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.hpp new file mode 100644 index 000000000..173184328 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserDescriptionState.hpp @@ -0,0 +1,50 @@ +#ifndef RDF4CPP_XMLPARSERDESCRIPTIONSTATE_H +#define RDF4CPP_XMLPARSERDESCRIPTIONSTATE_H + +#include +#include + +namespace rdf4cpp::parser::xml_states { + /** + * state for https://www.w3.org/TR/rdf11-xml/#nodeElementList and https://www.w3.org/TR/rdf11-xml/#nodeElement + * on_start_element checks and dispatches for the different options in https://www.w3.org/TR/rdf11-xml/#propertyEltList + * (https://www.w3.org/TR/rdf11-xml/#parseTypeResourcePropertyElt has no own state, instead gets handled directly by on_start_element) + * + * example: + * + * ... + * + */ + struct DescriptionState final : BaseState { + StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) override; + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + Node subject; + size_t list_current = 1; + + explicit DescriptionState(InheritedAttributeInfo const &i, Node sub) + : BaseState(i), subject(sub) { + } + + /** + * enters a description state + * @param out + * @param local_name + * @param uri + * @param attributes + * @param info + * @return transition & the node this state represents, to be used as object in parent states + */ + static std::pair enter(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info); + + static constexpr std::string_view start_element = "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description"; + static constexpr std::string_view about_attrib = "http://www.w3.org/1999/02/22-rdf-syntax-ns#about"; + static constexpr std::string_view id_attrib = "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID"; + static constexpr std::string_view node_id_attrib = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID"; + static constexpr std::string_view type_attrib = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; + }; +} // namespace rdf4cpp::parser::xml_states + +#endif //RDF4CPP_XMLPARSERDESCRIPTIONSTATE_H diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.cpp new file mode 100644 index 000000000..4bd4e50ed --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.cpp @@ -0,0 +1,24 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + StateTransition EmptyElement::on_characters(XMLOutputQueue &out, std::string_view const chars, XMLStateInfo const &info) { + if (!trim_left(chars).empty()) { + out.add_error(ParsingError::Type::BadSyntax, "expected end of element, found characters", info); + } + return {}; + } + + StateTransition EmptyElement::on_start_element(XMLOutputQueue &out, [[maybe_unused]] std::string_view local_name, [[maybe_unused]] std::string_view uri, [[maybe_unused]] std::span attributes, XMLStateInfo const &info) { + out.add_error(ParsingError::Type::BadSyntax, "expected end of element, found ???", info); + return {}; + } + + StateTransition EmptyElement::on_end_element([[maybe_unused]] XMLOutputQueue &out, [[maybe_unused]] XMLStateInfo const &info) { + return StateTransition{std::in_place_type}; + } + void EmptyElement::move_to(BaseState *b) noexcept { + new (b) EmptyElement(std::move(*this)); + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.hpp new file mode 100644 index 000000000..4cfd62ce0 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserEmptyElement.hpp @@ -0,0 +1,27 @@ +#ifndef XMLPARSEREMPTYELEMENT_HPP +#define XMLPARSEREMPTYELEMENT_HPP + +#include + +namespace rdf4cpp::parser::xml_states { + /** + * state for https://www.w3.org/TR/rdf11-xml/#emptyPropertyElt (if attributes are present) + * + * example: + * + * + * + */ + struct EmptyElement final : BaseState { + StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) override; + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + EmptyElement() + : BaseState({}) { + } + }; +} // namespace rdf4cpp::parser::xml_states + +#endif // XMLPARSEREMPTYELEMENT_HPP \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserInitialState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserInitialState.cpp new file mode 100644 index 000000000..287677d2f --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserInitialState.cpp @@ -0,0 +1,31 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + StateTransition InitialState::on_characters(XMLOutputQueue &out, std::string_view const chars, XMLStateInfo const &info) { + if (!trim_left(chars).empty()) { + out.add_error(ParsingError::Type::BadSyntax, "expected RDF or Description, found characters", info); + } + return {}; + } + + StateTransition InitialState::on_start_element(XMLOutputQueue &out, std::string_view const local_name, std::string_view const uri, [[maybe_unused]] std::span attributes, XMLStateInfo const &info) { + if (iri_equal_pieces(RDFState::start_element, uri, local_name)) { + return StateTransition{ + std::in_place_type, + get_inherited_attributes(out, attributes, info), + }; + } + auto [trans, _] = DescriptionState::enter(out, local_name, uri, attributes, info); + return trans; + } + + StateTransition InitialState::on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) { + out.add_error(ParsingError::Type::BadSyntax, "expected RDF or Description, found end of initial state?", info); + return {}; + } + void InitialState::move_to(BaseState *b) noexcept { + new (b) InitialState(std::move(*this)); + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserInitialState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserInitialState.hpp new file mode 100644 index 000000000..913bb1d3d --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserInitialState.hpp @@ -0,0 +1,23 @@ +#ifndef RDF4CPP_XMLPARSERINITIALSTATE_H +#define RDF4CPP_XMLPARSERINITIALSTATE_H + +#include +#include + +namespace rdf4cpp::parser::xml_states { + /** + * initial state, checks for start of https://www.w3.org/TR/rdf11-xml/#RDF + */ + struct InitialState final : BaseState { + StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) override; + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + InitialState() + : BaseState({}) { + } + }; +} // namespace rdf4cpp::parser::xml_states + +#endif //RDF4CPP_XMLPARSERINITIALSTATE_H diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.cpp new file mode 100644 index 000000000..a39c5c49c --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.cpp @@ -0,0 +1,48 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + + StateTransition PredicateState::on_characters([[maybe_unused]] XMLOutputQueue &out, std::string_view const chars, XMLStateInfo const &info) { + if (done) { + if (!trim_left(chars).empty()) { + out.add_error(ParsingError::Type::BadSyntax, "expected end of element, found literal", info); + } + return {}; + } + literal.append(chars); + return {}; + } + + StateTransition PredicateState::on_start_element(XMLOutputQueue &out, std::string_view const local_name, std::string_view const uri, std::span const attributes, XMLStateInfo const &info) { + if (!trim_left(literal).empty()) { + out.add_error(ParsingError::Type::BadSyntax, "expected end of element or literal, found element", info); + return {}; + } + if (done) { + out.add_error(ParsingError::Type::BadSyntax, "expected end of element, found element", info); + return {}; + } + auto [transition, obj] = DescriptionState::enter(out, local_name, uri, attributes, info); + done = true; + out.add_statement(subject, predicate, obj, reify); + return transition; + } + + StateTransition PredicateState::on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) { + if (!done) { + Literal const lit = out.make_literal(literal, std::nullopt, std::nullopt, info); + out.add_statement(subject, predicate, lit, reify); + } + return StateTransition{std::in_place_type}; + } + void PredicateState::move_to(BaseState *b) noexcept { + new (b) PredicateState(std::move(*this)); + } + + bool PredicateState::iri_reserved_predicate(std::string_view const uri, std::string_view const local_name) { + return iri_reserved(uri, local_name) || iri_equal_pieces(DescriptionState::start_element, uri, local_name) || iri_equal_pieces(list_start_element, uri, local_name); + } + +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.hpp new file mode 100644 index 000000000..a600d1b77 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserPredicateState.hpp @@ -0,0 +1,51 @@ +#ifndef RDF4CPP_XMLPARSERPREDICATESTATE_H +#define RDF4CPP_XMLPARSERPREDICATESTATE_H + +#include +#include + +namespace rdf4cpp::parser::xml_states { + /** + * state for https://www.w3.org/TR/rdf11-xml/#resourcePropertyElt (nested nodeElement / DescriptionState + * and https://www.w3.org/TR/rdf11-xml/#literalPropertyElt (literal with no datatype attribute) + * and https://www.w3.org/TR/rdf11-xml/#emptyPropertyElt (with no attributes (empty literal)) + * + * example: + * + * + * + * ... + * + * + * foo + * + * + */ + struct PredicateState : BaseState { + StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) override; + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + Node subject; + IRI predicate; + IRI reify; + std::string literal; + bool done = false; + + PredicateState(InheritedAttributeInfo const &i, Node sub, IRI predicate, IRI reify) + : BaseState(i), subject(sub), predicate(predicate), reify(reify) { + } + + static constexpr std::string_view resource_attrib = "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"; + static constexpr std::string_view parse_type_attrib = "http://www.w3.org/1999/02/22-rdf-syntax-ns#parseType"; + static constexpr std::string_view parse_type_resource = "Resource"; + static constexpr std::string_view parse_type_literal = "Literal"; + static constexpr std::string_view parse_type_collection = "Collection"; + static constexpr std::string_view list_start_element = "http://www.w3.org/1999/02/22-rdf-syntax-ns#li"; + + static bool iri_reserved_predicate(std::string_view uri, std::string_view local_name); + }; +} // namespace rdf4cpp::parser::xml_states + +#endif //RDF4CPP_XMLPARSERPREDICATESTATE_H diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserRDFState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserRDFState.cpp new file mode 100644 index 000000000..81884b0a6 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserRDFState.cpp @@ -0,0 +1,24 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + StateTransition RDFState::on_characters(XMLOutputQueue &out, std::string_view const chars, XMLStateInfo const &info) { + if (!trim_left(chars).empty()) { + out.add_error(ParsingError::Type::BadSyntax, "expected Description, found characters", info); + } + return {}; + } + + StateTransition RDFState::on_start_element(XMLOutputQueue &out, std::string_view const local_name, std::string_view const uri, std::span const attributes, XMLStateInfo const &info) { + auto [trans, _] = DescriptionState::enter(out, local_name, uri, attributes, info); + return trans; + } + + StateTransition RDFState::on_end_element([[maybe_unused]] XMLOutputQueue &out, [[maybe_unused]] XMLStateInfo const &info) { + return StateTransition{std::in_place_type}; + } + void RDFState::move_to(BaseState *b) noexcept { + new (b) RDFState(std::move(*this)); + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserRDFState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserRDFState.hpp new file mode 100644 index 000000000..4c69e2aeb --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserRDFState.hpp @@ -0,0 +1,28 @@ +#ifndef RDF4CPP_XMLPARSERRDFSTATE_H +#define RDF4CPP_XMLPARSERRDFSTATE_H + +#include +#include + +namespace rdf4cpp::parser::xml_states { + /** + * state for https://www.w3.org/TR/rdf11-xml/#RDF + * + * example: + * + * ... + * + */ + struct RDFState final : BaseState { + StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) override; + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + static constexpr std::string_view start_element = "http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF"; + + using BaseState::BaseState; + }; +} // namespace rdf4cpp::parser::xml_states + +#endif //RDF4CPP_XMLPARSERRDFSTATE_H diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.cpp new file mode 100644 index 000000000..d2c1c2357 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.cpp @@ -0,0 +1,21 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + StateTransition TypedLiteralPredicateState::on_start_element(XMLOutputQueue &out, [[maybe_unused]] std::string_view local_name, [[maybe_unused]] std::string_view uri, [[maybe_unused]] std::span attributes, XMLStateInfo const &info) { + out.add_error(ParsingError::Type::BadSyntax, "expected literal, found element", info); + return {}; + } + + StateTransition TypedLiteralPredicateState::on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) { + if (!datatype.null()) { + Literal const lit = out.make_literal(literal, datatype, std::nullopt, info); + out.add_statement(subject, predicate, lit, reify); + } + return StateTransition{std::in_place_type}; + } + void TypedLiteralPredicateState::move_to(BaseState *b) noexcept { + new (b) TypedLiteralPredicateState(std::move(*this)); + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.hpp new file mode 100644 index 000000000..9ab297877 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserTypedLiteralPredicateState.hpp @@ -0,0 +1,31 @@ +#ifndef RDF4CPP_XMLPARSERTYPEDLITERALPREDICATESTATE_H +#define RDF4CPP_XMLPARSERTYPEDLITERALPREDICATESTATE_H + +#include +#include + +namespace rdf4cpp::parser::xml_states { + /** + * state for https://www.w3.org/TR/rdf11-xml/#literalPropertyElt (with datatype attribute) + * + * example: + * + * 10 + * + */ + struct TypedLiteralPredicateState final : PredicateState { + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + IRI datatype; + + TypedLiteralPredicateState(InheritedAttributeInfo const &i, Node iri, IRI predicate, IRI reify, IRI datatype) + : PredicateState(i, iri, predicate, reify), datatype(datatype) { + } + + static constexpr std::string_view datatype_attrib = "http://www.w3.org/1999/02/22-rdf-syntax-ns#datatype"; + }; +} // namespace rdf4cpp::parser::xml_states + +#endif //RDF4CPP_XMLPARSERTYPEDLITERALPREDICATESTATE_H diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.cpp b/private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.cpp new file mode 100644 index 000000000..509361af3 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.cpp @@ -0,0 +1,57 @@ +#include + +#include + +namespace rdf4cpp::parser::xml_states { + StateTransition XMLLiteralState::on_characters([[maybe_unused]] XMLOutputQueue &out, [[maybe_unused]] std::string_view chars, XMLStateInfo const &info) { + source_input(info); + return {}; + } + + StateTransition XMLLiteralState::on_start_element([[maybe_unused]] XMLOutputQueue &out, [[maybe_unused]] std::string_view local_name, [[maybe_unused]] std::string_view uri, [[maybe_unused]] std::span attributes, XMLStateInfo const &info) { + ++depth; + source_input(info); + return {}; + } + + StateTransition XMLLiteralState::on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) { + if (depth > 0) { + --depth; + source_input(info); + return {}; + } + IRI datatype = out.make_hardcoded_iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"); + // filter out the parts of the source that are not part of the literal + std::string_view l = literal; + l = l.substr(0, last_offset); + l.remove_prefix(data_start); + // filter out the end of the start tag + // this tag belongs to the predicate + if (!l.empty() && l[0] == '/') { + l.remove_prefix(1); + } + if (!l.empty() && l[0] == '>') { + l.remove_prefix(1); + } + Literal const lit = out.make_literal(l, datatype, std::nullopt, info); + out.add_statement(subject, predicate, lit, reify); + return StateTransition{std::in_place_type}; + } + void XMLLiteralState::move_to(BaseState *b) noexcept { + new (b) XMLLiteralState(std::move(*this)); + } + + void XMLLiteralState::source_input(XMLStateInfo const &info) { + // collect all the different source parts and append them + int const off = info.source_offset; + std::string_view const sv = info.source; + if (literal.empty()) { + data_start = off; + } + if (!static_cast(literal).ends_with(sv)) { + last_size = literal.size(); + literal += sv; + } + last_offset = static_cast(off) + last_size; + } +} // namespace rdf4cpp::parser::xml_states \ No newline at end of file diff --git a/private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.hpp b/private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.hpp new file mode 100644 index 000000000..96c7acba5 --- /dev/null +++ b/private/rdf4cpp/parser/XMLStates/XMLParserXMLLiteralState.hpp @@ -0,0 +1,39 @@ +#ifndef RDF4CPP_XMLPARSERXMLLITERALESTATE_H +#define RDF4CPP_XMLPARSERXMLLITERALESTATE_H + +#include +#include + +namespace rdf4cpp::parser::xml_states { + /** + * state for https://www.w3.org/TR/rdf11-xml/#parseTypeLiteralPropertyElt + * (and https://www.w3.org/TR/rdf11-xml/#parseTypeOtherPropertyElt) + * + * example: + * + * + * + * + * note: + * this implementation does not match the specification exactly (omitting namespaces) + */ + struct XMLLiteralState final : PredicateState { + StateTransition on_characters(XMLOutputQueue &out, std::string_view chars, XMLStateInfo const &info) override; + StateTransition on_start_element(XMLOutputQueue &out, std::string_view local_name, std::string_view uri, std::span attributes, XMLStateInfo const &info) override; + StateTransition on_end_element(XMLOutputQueue &out, XMLStateInfo const &info) override; + void move_to(BaseState *b) noexcept override; + + size_t depth = 0; + size_t data_start = 0; + size_t last_offset = 0; + size_t last_size = 0; + + void source_input(XMLStateInfo const &info); + + XMLLiteralState(InheritedAttributeInfo const &i, Node sub, IRI predicate, IRI reify, XMLStateInfo const &info) : PredicateState(i, sub, predicate, reify) { + source_input(info); + } + }; +} // namespace rdf4cpp::parser::xml_states + +#endif //RDF4CPP_XMLPARSERXMLLITERALESTATE_H diff --git a/src/rdf4cpp/IRIFactory.cpp b/src/rdf4cpp/IRIFactory.cpp index 52fdd444f..4e1e86873 100644 --- a/src/rdf4cpp/IRIFactory.cpp +++ b/src/rdf4cpp/IRIFactory.cpp @@ -292,5 +292,9 @@ IRIFactoryError IRIFactory::set_base(std::string_view b) noexcept { base_parts_cache = IRIView{base}.all_parts(); return IRIFactoryError::Ok; } +void IRIFactory::set_base_unchecked(std::string_view b) noexcept { + base = b; + base_parts_cache = IRIView{base}.all_parts(); +} } // namespace rdf4cpp diff --git a/src/rdf4cpp/IRIFactory.hpp b/src/rdf4cpp/IRIFactory.hpp index 4ba20526e..d2549f7d5 100644 --- a/src/rdf4cpp/IRIFactory.hpp +++ b/src/rdf4cpp/IRIFactory.hpp @@ -105,6 +105,12 @@ struct IRIFactory { * @return */ [[nodiscard]] IRIFactoryError set_base(std::string_view b) noexcept; + /** + * Changes the base IRI. Skips validating the new base IRI. + * @param b + * @return + */ + void set_base_unchecked(std::string_view b) noexcept; /** * validates the given IRI and creates it in the given node storage, if valid. diff --git a/src/rdf4cpp/parser/IStreamQuadIterator.cpp b/src/rdf4cpp/parser/IStreamQuadIterator.cpp index 677a8522c..1abc99c91 100644 --- a/src/rdf4cpp/parser/IStreamQuadIterator.cpp +++ b/src/rdf4cpp/parser/IStreamQuadIterator.cpp @@ -1,5 +1,7 @@ #include "IStreamQuadIterator.hpp" + #include +#include #include @@ -38,19 +40,33 @@ static int istream_error(void *voided_self) noexcept { return static_cast(self->fail() && !self->eof()); } +/** + * Adaptor function for serd to check if an std::istream is at the end of file + * + * @param voided_self pointer to std::istream cast to void * + * @return whether the given istream encountered an error (cast to int) + */ +static int istream_eof(void *voided_self) noexcept { + auto *self = static_cast(voided_self); + return static_cast(self->eof()); +} + IStreamQuadIterator::IStreamQuadIterator(void *stream, ReadFunc read, ErrorFunc error, + EOFFunc eof, flags_type flags, state_type *state) - : impl{std::make_unique(stream, read, error, flags, state)}, + : impl{flags.get_syntax() == ParsingFlag::RdfXml ? + static_cast>(std::make_unique(stream, read, error, eof, state)) : + std::make_unique(stream, read, error, flags, state)}, cur{impl->next()} { } IStreamQuadIterator::IStreamQuadIterator(std::istream &istream, flags_type flags, state_type *state) - : IStreamQuadIterator{&istream, &istream_read, &istream_error, flags, state} { + : IStreamQuadIterator{&istream, &istream_read, &istream_error, &istream_eof, flags, state} { } IStreamQuadIterator::IStreamQuadIterator(IStreamQuadIterator &&other) noexcept = default; diff --git a/src/rdf4cpp/parser/IStreamQuadIterator.hpp b/src/rdf4cpp/parser/IStreamQuadIterator.hpp index 6cc27239e..47148b070 100644 --- a/src/rdf4cpp/parser/IStreamQuadIterator.hpp +++ b/src/rdf4cpp/parser/IStreamQuadIterator.hpp @@ -35,6 +35,15 @@ using ReadFunc = size_t (*)(void *buffer, size_t elem_size, size_t count, void * */ using ErrorFunc = int (*)(void *stream); +/** + * Identical semantics to feof. + * + * + * @param stream pointer to any object + * @return nonzero value if there is an error in stream, zero value otherwise + */ +using EOFFunc = int (*)(void *stream); + /** * Similar to std::istream_iterator<>. * Parses the given istream and tries to extract Quads given in TURTLE format. @@ -70,11 +79,34 @@ struct IStreamQuadIterator { using istream_type = std::istream; private: - struct Impl; + struct Impl { + virtual ~Impl() = default; + /** + * Tries to extract the next element from the backend. + * Will try to skip over errors so that the next call might be able to return a value. + * + * @note Call until std::nullopt is returned + * @return + * std::nullopt: if there is no next element (eof) + * expected Quad: if there was a next element and it could be parsed + * unexpected ParsingError: if there was a next element but it could not be parsed + */ + [[nodiscard]] virtual std::optional> next() = 0; + [[nodiscard]] virtual uint64_t current_line() const noexcept = 0; + [[nodiscard]] virtual uint64_t current_column() const noexcept = 0; + + Impl() = default; + Impl(Impl const &) = delete; + Impl(Impl&&) = delete; + Impl &operator=(Impl const &) = delete; + Impl &operator=(Impl &&) = delete; + }; + + struct ImplSerd; + struct ImplXML; std::unique_ptr impl; std::optional> cur; - public: /** * Constructs a IStreamQuadIterator from a C-like io api. That is something similar to @@ -91,6 +123,7 @@ struct IStreamQuadIterator { IStreamQuadIterator(void *stream, ReadFunc read, ErrorFunc error, + EOFFunc eof, flags_type flags = ParsingFlags::none(), state_type *initial_state = nullptr); diff --git a/src/rdf4cpp/parser/ParsingFlags.hpp b/src/rdf4cpp/parser/ParsingFlags.hpp index 7f8faaa76..6da05a64e 100644 --- a/src/rdf4cpp/parser/ParsingFlags.hpp +++ b/src/rdf4cpp/parser/ParsingFlags.hpp @@ -21,7 +21,9 @@ enum struct ParsingFlag : uint8_t { NTriples = 0b01 << 4, NQuads = 0b10 << 4, TriG = 0b11 << 4, + RdfXml = 0b100 << 4, }; +constexpr uint8_t ParsingFlag_SyntaxMask = 0b111 << 4; struct ParsingFlags { private: @@ -67,7 +69,7 @@ struct ParsingFlags { * @return the syntax ParsingFlag contained in this ParsingFlags. (Turtle if not specified) */ [[nodiscard]] constexpr ParsingFlag get_syntax() const noexcept { - return static_cast(flags & static_cast(ParsingFlag::TriG)); // TriG is 11, so it can double as a mask + return static_cast(flags & static_cast(ParsingFlag_SyntaxMask)); } [[nodiscard]] constexpr bool syntax_allows_prefixes() const noexcept { diff --git a/src/rdf4cpp/parser/RDFFileParser.cpp b/src/rdf4cpp/parser/RDFFileParser.cpp index 9c70d66c7..055a2db0e 100644 --- a/src/rdf4cpp/parser/RDFFileParser.cpp +++ b/src/rdf4cpp/parser/RDFFileParser.cpp @@ -30,7 +30,7 @@ RDFFileParser::iterator::iterator(FILE *&&stream, state_type *state) : stream_(stream), iter_(std::make_unique(stream_, reinterpret_cast(&fread), reinterpret_cast(&ferror), - flags, state)) { + reinterpret_cast(feof), flags, state)) { } RDFFileParser::iterator::~iterator() noexcept { fclose(stream_); diff --git a/src/rdf4cpp/util/CharMatcher.hpp b/src/rdf4cpp/util/CharMatcher.hpp index 2ce19c939..d178bf3ab 100644 --- a/src/rdf4cpp/util/CharMatcher.hpp +++ b/src/rdf4cpp/util/CharMatcher.hpp @@ -327,6 +327,61 @@ struct PNChars_UnicodePartMatcher { */ constexpr auto PNCharsMatcher = ASCIINumMatcher{} | ASCIIPatternMatcher{"-"} | PNCharsUMatcher | PNChars_UnicodePartMatcher{}; +namespace xml { + /** + * Matches the unicode part (the characters listed as numbers) of NCNameStartChar of the XML specification + */ + struct NCNameStartChar_UnicodePartMatcher { + [[nodiscard]] static constexpr bool match(int c) noexcept { + return (c >= 0xC0 && c <= 0xD6) || + (c >= 0xD8 && c <= 0xF6) || + (c >= 0xF8 && c <= 0x2FF) || + (c >= 0x370 && c <= 0x37D) || + (c >= 0x37F && c <= 0x1FFF) || + (c >= 0x200C && c <= 0x200D) || + (c >= 0x2070 && c <= 0x218F) || + (c >= 0x2C00 && c <= 0x2FEF) || + (c >= 0x3001 && c <= 0xD7FF) || + (c >= 0xF900 && c <= 0xFDCF) || + (c >= 0xFDF0 && c <= 0xFFFD) || + (c >= 0x10000 && c <= 0xEFFFF); + } + + static constexpr size_t simd_range_num = 0; + static constexpr bool fail_if_unicode = false; + [[nodiscard]] static consteval std::array simd_ranges() noexcept { + return {}; + } + [[nodiscard]] static consteval auto simd_singles() noexcept { + return datatypes::registry::util::ConstexprString(""); + } + }; + + + /** + * Matches the unicode part (the characters listed as numbers) of NCNameChar of the XML specification + */ + struct NCNameChar_UnicodePartMatcher { + [[nodiscard]] static constexpr bool match(int c) noexcept { + return c == 0xB7 || + (c >= 0x0300 && c <= 0x036F) || + (c >= 0x203F && c <= 0x2040); + } + + static constexpr size_t simd_range_num = 0; + static constexpr bool fail_if_unicode = false; + [[nodiscard]] static consteval std::array simd_ranges() noexcept { + return {}; + } + [[nodiscard]] static consteval auto simd_singles() noexcept { + return datatypes::registry::util::ConstexprString(""); + } + }; + + constexpr auto NCNameStartChar = ASCIIAlphaMatcher{} | ASCIIPatternMatcher{"_"} | NCNameStartChar_UnicodePartMatcher{}; + constexpr auto NCNameChar = ASCIIAlphaMatcher{} | ASCIINumMatcher{} | ASCIIPatternMatcher{"_-."} | NCNameStartChar_UnicodePartMatcher{} | NCNameChar_UnicodePartMatcher{}; +} + /** * iterates over s and tries to match all in m. * attempts to do an ASCII SIMD match first, if that does not decide the matching, decodes the utf-8 and matches char by char. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d00dca115..2c38a3cd5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,6 +2,7 @@ set(CMAKE_CXX_STANDARD 20) find_package(doctest REQUIRED) find_package(nanobench REQUIRED) +find_package(CURL REQUIRED) # add the executable for all tests add_executable(tests_Variable query/tests_Variable.cpp) @@ -360,6 +361,14 @@ target_link_libraries(tests_Anonymizer rdf4cpp) add_test(NAME tests_Anonymizer COMMAND tests_Anonymizer) +add_executable(tests_XMLParser parser/tests_XMLParser.cpp) +target_link_libraries(tests_XMLParser + doctest::doctest + rdf4cpp + CURL::libcurl +) +add_test(NAME tests_XMLParser COMMAND tests_XMLParser) + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/test_swdf/swdf.nt") file(DOWNLOAD "https://files.tentris.dev/swdf.zip" "${CMAKE_CURRENT_BINARY_DIR}/test_swdf/swdf.zip") execute_process(COMMAND unzip "${CMAKE_CURRENT_BINARY_DIR}/test_swdf/swdf.zip" -d "${CMAKE_CURRENT_BINARY_DIR}/test_swdf") diff --git a/tests/bench_SerDe.cpp b/tests/bench_SerDe.cpp index d32d8bbef..a221b5145 100644 --- a/tests/bench_SerDe.cpp +++ b/tests/bench_SerDe.cpp @@ -27,6 +27,7 @@ void deserialize(std::filesystem::path const &in_path, Dataset &ds, storage::Dyn parser::IStreamQuadIterator qit{in_file, reinterpret_cast(&fread), reinterpret_cast(&ferror), + reinterpret_cast(&feof), parser::ParsingFlags::none(), &state}; diff --git a/tests/parser/tests_IStreamQuadIterator.cpp b/tests/parser/tests_IStreamQuadIterator.cpp index 7c705cd4c..8df3754f8 100644 --- a/tests/parser/tests_IStreamQuadIterator.cpp +++ b/tests/parser/tests_IStreamQuadIterator.cpp @@ -404,7 +404,7 @@ TEST_SUITE("IStreamQuadIterator") { } auto *f = fopen(path, "r"); - for (IStreamQuadIterator qit{f, reinterpret_cast(fread), reinterpret_cast(ferror)}; qit != std::default_sentinel; ++qit) { + for (IStreamQuadIterator qit{f, reinterpret_cast(fread), reinterpret_cast(ferror), reinterpret_cast(feof)}; qit != std::default_sentinel; ++qit) { FAIL("not empty"); } diff --git a/tests/parser/tests_XMLParser.cpp b/tests/parser/tests_XMLParser.cpp new file mode 100644 index 000000000..e69b20c45 --- /dev/null +++ b/tests/parser/tests_XMLParser.cpp @@ -0,0 +1,447 @@ +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include +#include + +#include +#include +#include + +using namespace rdf4cpp; +using namespace rdf4cpp::parser; + +TEST_CASE("sanity test") { + std::stringstream str{R"( + + example + 42 + not a number + + + true + + + other example + + + + + blank example + + + + + blank example 2 + + + + + abc def + +)"}; + + IStreamQuadIterator it{str, ParsingFlag::RdfXml}; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")); + CHECK(it->value().object() == IRI::make("https://www.example2.com/type")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/title")); + CHECK(it->value().object() == Literal::make_simple("example")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/cost")); + CHECK(it->value().object() == Literal::make_typed_from_value(42)); + ++it; + CHECK(it != std::default_sentinel); + CHECK(!it->has_value()); + CHECK(it->error().error_type == ParsingError::Type::BadLiteral); + CHECK(it->error().message == "http://www.w3.org/2001/XMLSchema#int parsing error: found n, invalid for datatype"); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/author")); + CHECK(it->value().object() == IRI::make("https://www.example2.com")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(!it->has_value()); + CHECK(it->error().error_type == ParsingError::Type::BadIri); + CHECK(it->error().message == "htt?ps://example: InvalidScheme"); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/released")); + CHECK(it->value().object() == Literal::make_typed_from_value(true)); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/recommended")); + CHECK(it->value().object() == IRI::make("https://www.other_example.com")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.other_example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/title")); + CHECK(it->value().object() == Literal::make_simple("other example")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/recommended")); + CHECK(it->value().object().is_blank_node()); + auto bn = it->value().object(); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == bn); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/title")); + CHECK(it->value().object() == Literal::make_simple("blank example")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/recommended")); + CHECK(it->value().object().is_blank_node()); + CHECK(it->value().object() != bn); + auto bn2 = it->value().object(); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == bn2); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/title")); + CHECK(it->value().object() == Literal::make_simple("blank example 2")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/coll")); + CHECK(it->value().object() == IRI::make("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/a")); + CHECK(it->value().object() == Literal::make_typed(" ", IRI::make("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))); + ++it; + CHECK(it != std::default_sentinel); + CHECK(it->has_value()); + CHECK(it->value().subject() == IRI::make("https://www.example.com")); + CHECK(it->value().predicate() == IRI::make("https://www.example.com/a")); + CHECK(it->value().object() == Literal::make_typed("abc def", IRI::make("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))); + ++it; + CHECK(it == std::default_sentinel); +} + +void xml_test_positive(std::string xml_str, std::string nt_str, std::string_view base_iri) { + CAPTURE(base_iri); + + IStreamQuadIterator::state_type state{}; + CHECK(state.iri_factory.set_base(base_iri) == IRIFactoryError::Ok); + std::stringstream xml{std::move(xml_str)}; + IStreamQuadIterator xml_iter{xml, ParsingFlag::RdfXml, &state}; + std::vector xml_results; + + std::stringstream nt{std::move(nt_str)}; + IStreamQuadIterator nt_iter{nt, ParsingFlag::NTriples}; + std::vector nt_results; + + static constexpr auto read_iter_to = [](IStreamQuadIterator& i, std::vector& r) { + while (i != std::default_sentinel) { + if (!i->has_value()) { + FAIL(i->error().message); + } + r.emplace_back(i->value()); + ++i; + } + }; + read_iter_to(xml_iter, xml_results); + read_iter_to(nt_iter, nt_results); + + REQUIRE(xml_results.size() == nt_results.size()); + + static constexpr auto num_blanks = [](const query::QuadPattern& p) { + size_t n = 0; + if (p.subject().is_blank_node()) { + ++n; + } + if (p.predicate().is_blank_node()) { + ++n; + } + if (p.object().is_blank_node()) { + ++n; + } + return n; + }; + static constexpr auto sort = [](std::vector& v) { + std::sort(v.begin(), v.end(), [](const query::QuadPattern& a, const query::QuadPattern& b) { + auto a_bl = num_blanks(a); + auto b_bl = num_blanks(b); + if (a_bl != b_bl) { + return std::less{}(a_bl, b_bl); + } + if (a.subject() != b.subject() && !a.subject().is_blank_node() && !b.subject().is_blank_node()) { + return std::less{}(a.subject(), b.subject()); + } + if (a.predicate() != b.predicate() && !a.predicate().is_blank_node() && !b.predicate().is_blank_node()) { + return std::less{}(a.predicate(), b.predicate()); + } + if (!a.object().is_blank_node() && !b.object().is_blank_node()) { + return std::less{}(a.object(), b.object()); + } + if (a.subject() != b.subject()) { + return std::less{}(a.subject(), b.subject()); + } + if (a.predicate() != b.predicate()) { + return std::less{}(a.predicate(), b.predicate()); + } + return std::less{}(a.object(), b.object()); + }); + }; + sort(xml_results); + sort(nt_results); + + std::map bn_map{}; + auto check = [&bn_map](Node xml, Node nt) { + if (nt.is_blank_node() && xml.is_blank_node()) { + auto i = bn_map.find(nt.as_blank_node()); + if (i != bn_map.end()) { + CHECK(xml.as_blank_node() == i->second.as_blank_node()); + } + else { + bn_map[nt.as_blank_node()] = xml.as_blank_node(); + } + } + else { + CHECK(xml == nt); + } + }; + + for (size_t i = 0; i < nt_results.size(); ++i) { + check(xml_results.at(i).subject(), nt_results.at(i).subject()); + check(xml_results.at(i).predicate(), nt_results.at(i).predicate()); + check(xml_results.at(i).object(), nt_results.at(i).object()); + } +} + +void xml_test_negative(std::string xml_str, std::string_view base_iri) { + CAPTURE(base_iri); + + + std::stringstream xml{std::move(xml_str)}; + IStreamQuadIterator xml_iter{xml, ParsingFlag::RdfXml}; + + bool had_error = false; + while (xml_iter != std::default_sentinel) { + if (xml_iter->has_value()) { + ++xml_iter; + continue; + } + had_error = true; + ++xml_iter; + } + CHECK(had_error == true); +} + + +// adopted from https://stackoverflow.com/questions/9786150/save-curl-content-result-into-a-string-in-c/9786295#9786295 +static size_t write_callback(void const *contents, size_t size, size_t nmemb, void *userp) { + static_cast(userp)->append(static_cast(contents), size * nmemb); + return size * nmemb; +} + +std::string remote_test_file_to_str(std::string const &file_name) { + CURL *curl = nullptr; + CURLcode curl_res; + auto const url = std::format("https://raw.githubusercontent.com/w3c/rdf-tests/refs/heads/main/rdf/rdf11/rdf-xml/{}", file_name); + std::string file_contents_as_str; + curl = curl_easy_init(); + if(curl) { + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &file_contents_as_str); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); // for https + curl_res = curl_easy_perform(curl); + curl_easy_cleanup(curl); + } + REQUIRE_EQ(curl_res, CURLE_OK); + return file_contents_as_str; +} + +TEST_CASE("test cases from rdf-tests") { + // positive tests + xml_test_positive(remote_test_file_to_str("amp-in-url/test001.rdf"), remote_test_file_to_str("amp-in-url/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/amp-in-url/test001.rdf"); + xml_test_positive(remote_test_file_to_str("datatypes/test001.rdf"), remote_test_file_to_str("datatypes/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/datatypes/test001.rdf"); + // xml_test_positive(remote_test_file_to_str("datatypes/test002.rdf"), remote_test_file_to_str("datatypes/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/datatypes/test002.rdf"); // invalid integer + xml_test_positive(remote_test_file_to_str("rdf-charmod-literals/test001.rdf"), remote_test_file_to_str("rdf-charmod-literals/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-charmod-literals/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-charmod-uris/test001.rdf"), remote_test_file_to_str("rdf-charmod-uris/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-charmod-uris/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-charmod-uris/test002.rdf"), remote_test_file_to_str("rdf-charmod-uris/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-charmod-uris/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-containers-syntax-vs-schema/test001.rdf"), remote_test_file_to_str("rdf-containers-syntax-vs-schema/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-containers-syntax-vs-schema/test002.rdf"), remote_test_file_to_str("rdf-containers-syntax-vs-schema/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-containers-syntax-vs-schema/test003.rdf"), remote_test_file_to_str("rdf-containers-syntax-vs-schema/test003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/test003.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-containers-syntax-vs-schema/test004.rdf"), remote_test_file_to_str("rdf-containers-syntax-vs-schema/test004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/test004.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-containers-syntax-vs-schema/test006.rdf"), remote_test_file_to_str("rdf-containers-syntax-vs-schema/test006.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/test006.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-containers-syntax-vs-schema/test007.rdf"), remote_test_file_to_str("rdf-containers-syntax-vs-schema/test007.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/test007.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-containers-syntax-vs-schema/test008.rdf"), remote_test_file_to_str("rdf-containers-syntax-vs-schema/test008.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/test008.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-element-not-mandatory/test001.rdf"), remote_test_file_to_str("rdf-element-not-mandatory/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-element-not-mandatory/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-node-element/test001.rdf"), remote_test_file_to_str("rdf-node-element/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-node-element/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0001.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0001.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0003.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0003.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0004.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0004.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0005.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0005.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0005.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0006.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0006.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0006.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0009.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0009.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0009.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0010.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0010.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0010.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0011.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0011.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0011.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0012.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0012.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0012.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0013.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0013.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0013.rdf"); + xml_test_positive(remote_test_file_to_str("rdf-ns-prefix-confusion/test0014.rdf"), remote_test_file_to_str("rdf-ns-prefix-confusion/test0014.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-ns-prefix-confusion/test0014.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-difference-between-ID-and-about/test1.rdf"), remote_test_file_to_str("rdfms-difference-between-ID-and-about/test1.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-difference-between-ID-and-about/test1.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-difference-between-ID-and-about/test2.rdf"), remote_test_file_to_str("rdfms-difference-between-ID-and-about/test2.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-difference-between-ID-and-about/test2.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-difference-between-ID-and-about/test3.rdf"), remote_test_file_to_str("rdfms-difference-between-ID-and-about/test3.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-difference-between-ID-and-about/test3.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-duplicate-member-props/test001.rdf"), remote_test_file_to_str("rdfms-duplicate-member-props/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-duplicate-member-props/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test001.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test002.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test004.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test004.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test005.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test005.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test005.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test006.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test006.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test006.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test007.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test007.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test007.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test008.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test008.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test008.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test010.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test010.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test010.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test011.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test011.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test011.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test012.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test012.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test012.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test013.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test013.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test013.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test014.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test014.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test014.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test015.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test015.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test015.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test016.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test016.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test016.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-empty-property-elements/test017.rdf"), remote_test_file_to_str("rdfms-empty-property-elements/test017.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/test017.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-identity-anon-resources/test001.rdf"), remote_test_file_to_str("rdfms-identity-anon-resources/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-identity-anon-resources/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-identity-anon-resources/test002.rdf"), remote_test_file_to_str("rdfms-identity-anon-resources/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-identity-anon-resources/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-identity-anon-resources/test003.rdf"), remote_test_file_to_str("rdfms-identity-anon-resources/test003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-identity-anon-resources/test003.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-identity-anon-resources/test004.rdf"), remote_test_file_to_str("rdfms-identity-anon-resources/test004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-identity-anon-resources/test004.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-identity-anon-resources/test005.rdf"), remote_test_file_to_str("rdfms-identity-anon-resources/test005.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-identity-anon-resources/test005.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-not-id-and-resource-attr/test001.rdf"), remote_test_file_to_str("rdfms-not-id-and-resource-attr/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-not-id-and-resource-attr/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-not-id-and-resource-attr/test002.rdf"), remote_test_file_to_str("rdfms-not-id-and-resource-attr/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-not-id-and-resource-attr/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-not-id-and-resource-attr/test004.rdf"), remote_test_file_to_str("rdfms-not-id-and-resource-attr/test004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-not-id-and-resource-attr/test004.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-not-id-and-resource-attr/test005.rdf"), remote_test_file_to_str("rdfms-not-id-and-resource-attr/test005.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-not-id-and-resource-attr/test005.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-para196/test001.rdf"), remote_test_file_to_str("rdfms-para196/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-para196/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-001.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-002.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-003.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-003.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-004.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-004.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-005.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-005.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-005.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-006.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-006.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-006.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-007.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-007.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-007.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-008.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-008.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-008.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-009.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-009.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-009.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-010.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-010.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-010.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-011.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-011.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-011.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-012.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-012.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-012.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-013.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-013.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-013.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-014.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-014.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-014.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-015.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-015.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-015.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-016.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-016.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-016.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-017.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-017.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-017.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-018.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-018.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-018.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-019.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-019.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-019.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-020.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-020.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-020.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-021.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-021.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-021.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-022.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-022.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-022.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-023.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-023.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-023.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-024.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-024.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-024.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-025.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-025.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-025.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-026.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-026.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-026.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-027.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-027.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-027.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-028.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-028.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-028.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-029.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-029.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-029.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-030.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-030.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-030.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-031.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-031.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-031.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-032.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-032.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-032.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-033.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-033.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-033.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-034.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-034.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-034.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-035.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-035.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-035.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-036.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-036.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-036.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/test-037.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/test-037.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/test-037.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/warn-001.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/warn-001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/warn-001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/warn-002.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/warn-002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/warn-002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-rdf-names-use/warn-003.rdf"), remote_test_file_to_str("rdfms-rdf-names-use/warn-003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/warn-003.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-reification-required/test001.rdf"), remote_test_file_to_str("rdfms-reification-required/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-reification-required/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-reification-required/test002.rdf"), remote_test_file_to_str("rdfms-reification-required/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-reification-required/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-seq-representation/test001.rdf"), remote_test_file_to_str("rdfms-seq-representation/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-seq-representation/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-seq-representation/test002.rdf"), remote_test_file_to_str("rdfms-seq-representation/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-seq-representation/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-syntax-incomplete/test001.rdf"), remote_test_file_to_str("rdfms-syntax-incomplete/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-syntax-incomplete/test002.rdf"), remote_test_file_to_str("rdfms-syntax-incomplete/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/test002.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-syntax-incomplete/test003.rdf"), remote_test_file_to_str("rdfms-syntax-incomplete/test003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/test003.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-syntax-incomplete/test004.rdf"), remote_test_file_to_str("rdfms-syntax-incomplete/test004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/test004.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-uri-substructure/test001.rdf"), remote_test_file_to_str("rdfms-uri-substructure/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-uri-substructure/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-xmllang/test003.rdf"), remote_test_file_to_str("rdfms-xmllang/test003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-xmllang/test003.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-xmllang/test004.rdf"), remote_test_file_to_str("rdfms-xmllang/test004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-xmllang/test004.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-xmllang/test005.rdf"), remote_test_file_to_str("rdfms-xmllang/test005.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-xmllang/test005.rdf"); + xml_test_positive(remote_test_file_to_str("rdfms-xmllang/test006.rdf"), remote_test_file_to_str("rdfms-xmllang/test006.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-xmllang/test006.rdf"); + xml_test_positive(remote_test_file_to_str("rdfs-domain-and-range/test001.rdf"), remote_test_file_to_str("rdfs-domain-and-range/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfs-domain-and-range/test001.rdf"); + xml_test_positive(remote_test_file_to_str("rdfs-domain-and-range/test002.rdf"), remote_test_file_to_str("rdfs-domain-and-range/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfs-domain-and-range/test002.rdf"); + xml_test_positive(remote_test_file_to_str("unrecognised-xml-attributes/test001.rdf"), remote_test_file_to_str("unrecognised-xml-attributes/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/unrecognised-xml-attributes/test001.rdf"); + xml_test_positive(remote_test_file_to_str("unrecognised-xml-attributes/test002.rdf"), remote_test_file_to_str("unrecognised-xml-attributes/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/unrecognised-xml-attributes/test002.rdf"); + // xml_test_positive(remote_test_file_to_str("xml-canon/test001.rdf"), remote_test_file_to_str("xml-canon/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xml-canon/test001.rdf"); // XMLLiteral is not exactly as the spec defines + // xml_test_positive(remote_test_file_to_str("xml-canon/test002.rdf"), remote_test_file_to_str("xml-canon/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xml-canon/test002.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test001.rdf"), remote_test_file_to_str("xmlbase/test001.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test001.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test002.rdf"), remote_test_file_to_str("xmlbase/test002.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test002.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test003.rdf"), remote_test_file_to_str("xmlbase/test003.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test003.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test004.rdf"), remote_test_file_to_str("xmlbase/test004.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test004.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test006.rdf"), remote_test_file_to_str("xmlbase/test006.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test006.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test007.rdf"), remote_test_file_to_str("xmlbase/test007.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test007.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test008.rdf"), remote_test_file_to_str("xmlbase/test008.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test008.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test009.rdf"), remote_test_file_to_str("xmlbase/test009.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test009.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test010.rdf"), remote_test_file_to_str("xmlbase/test010.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test010.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test011.rdf"), remote_test_file_to_str("xmlbase/test011.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test011.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test013.rdf"), remote_test_file_to_str("xmlbase/test013.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test013.rdf"); + xml_test_positive(remote_test_file_to_str("xmlbase/test014.rdf"), remote_test_file_to_str("xmlbase/test014.nt"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/xmlbase/test014.rdf"); + // negative tests + xml_test_negative(remote_test_file_to_str("rdf-containers-syntax-vs-schema/error001.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/error001.rdf"); + xml_test_negative(remote_test_file_to_str("rdf-containers-syntax-vs-schema/error002.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdf-containers-syntax-vs-schema/error002.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-abouteach/error001.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-abouteach/error001.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-abouteach/error002.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-abouteach/error002.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-difference-between-ID-and-about/error1.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-difference-between-ID-and-about/error1.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-empty-property-elements/error001.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/error001.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-empty-property-elements/error002.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-empty-property-elements/error002.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-id/error001.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-id/error001.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-id/error002.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-id/error002.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-id/error003.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-id/error003.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-id/error004.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-id/error004.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-id/error005.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-id/error005.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-id/error006.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-id/error006.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-id/error007.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-id/error007.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-001.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-001.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-002.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-002.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-003.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-003.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-004.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-004.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-005.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-005.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-006.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-006.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-007.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-007.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-008.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-008.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-009.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-009.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-010.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-010.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-011.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-011.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-012.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-012.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-013.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-013.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-014.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-014.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-015.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-015.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-016.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-016.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-017.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-017.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-018.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-018.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-019.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-019.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-rdf-names-use/error-020.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-rdf-names-use/error-020.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-syntax-incomplete/error001.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/error001.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-syntax-incomplete/error002.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/error002.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-syntax-incomplete/error003.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/error003.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-syntax-incomplete/error004.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/error004.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-syntax-incomplete/error005.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/error005.rdf"); + xml_test_negative(remote_test_file_to_str("rdfms-syntax-incomplete/error006.rdf"), "https://w3c.github.io/rdf-tests/rdf/rdf11/rdf-xml/rdfms-syntax-incomplete/error006.rdf"); +} From f6b22963c44672ef1493c5b5759e649ff30235d2 Mon Sep 17 00:00:00 2001 From: Liss Heidrich <31625940+liss-h@users.noreply.github.com> Date: Tue, 27 Jan 2026 08:49:38 +0100 Subject: [PATCH 2/2] version bump --- CMakeLists.txt | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de45ed9fb..7f8cfb4a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.22) -project(rdf4cpp VERSION 0.1.12) +project(rdf4cpp VERSION 0.1.13) set(POBR_VERSION 3) # Persisted Object Binary Representation include(cmake/boilerplate_init.cmake) diff --git a/README.md b/README.md index 3f6f572f2..50f5cc4a8 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ To use _rdf4cpp_, add it to your `conanfile.txt`: ``` [requires] -rdf4cpp/0.1.12 +rdf4cpp/0.1.13 ``` For getting started how to use rdf4cpp, check out the [examples](./examples) directory and refer to our documentation.