diff --git a/src/rdf/turtle.js b/src/rdf/turtle.js index e5d1b89..e0b81e3 100644 --- a/src/rdf/turtle.js +++ b/src/rdf/turtle.js @@ -189,10 +189,26 @@ function jsonLdToQuads(jsonLd, baseUri) { const context = mergedContext; - for (const node of nodes) { + // BFS over nodes so that nested node objects (e.g. CID `service[]` entries + // with their own @id/@type/properties) are emitted as their own subjects + // rather than collapsed to a bare URI reference. + // + // Two notes on the traversal shape: + // - Index-based iteration avoids O(n) array.shift() per step. + // - We deliberately do NOT skip re-emission when the same @id appears + // twice. Duplicate triples are harmless in RDF, and documents built + // from PATCH merges or multi-doc inputs can legitimately carry + // multiple objects for the same subject. The `enqueuedNested` set + // (by object identity) is used only to prevent the same nested + // object from being enqueued twice — i.e. cycle protection, not + // emission deduplication. + const enqueuedNested = new WeakSet(); + const queue = [...nodes]; + for (let i = 0; i < queue.length; i++) { + const node = queue[i]; if (!node['@id']) continue; - const subjectUri = resolveUri(node['@id'], baseUri); + const subject = subjectUri.startsWith('_:') ? blankNode(subjectUri.slice(2)) : namedNode(subjectUri); @@ -227,6 +243,20 @@ function jsonLdToQuads(jsonLd, baseUri) { if (object) { quads.push(quad(subject, predicate, object)); } + // If v is a nested node (object with @id and at least one non-@value + // own property beyond @id), enqueue it so its triples are also + // emitted. Object-identity tracking (WeakSet) prevents the same + // nested object from being enqueued twice, which would otherwise + // loop for graphs that reuse an object reference (cycles). + if (v && typeof v === 'object' && !Array.isArray(v) && + v['@id'] && v['@value'] === undefined && + !enqueuedNested.has(v)) { + const hasOwnClaims = Object.keys(v).some(k => k !== '@id'); + if (hasOwnClaims) { + enqueuedNested.add(v); + queue.push(v); + } + } } } } @@ -378,9 +408,14 @@ function resolveUri(uri, baseUri) { } /** - * Expand prefixed URI using context + * Expand prefixed URI using context. + * + * The `seen` parameter guards against cycles in user-supplied contexts + * (e.g., `foo -> bar -> foo`). Without this a request carrying a malicious + * JSON-LD context could cause unbounded recursion / stack overflow on the + * server during conneg conversion — a remote DoS. */ -function expandUri(uri, context) { +function expandUri(uri, context, seen) { if (uri.includes('://')) { return uri; } @@ -388,19 +423,29 @@ function expandUri(uri, context) { if (uri.includes(':')) { const [prefix, local] = uri.split(':', 2); const ns = context[prefix] || COMMON_PREFIXES[prefix]; - if (ns) { + // Only concat when the prefix maps to a string namespace. A user-supplied + // context can legally define a prefix-looking key as a term-definition + // object; string-concatenating that would produce "[object Object]…". + if (typeof ns === 'string') { return ns + local; } } - // Check if it's a term in context + // Check if it's a term in context. A context value can itself be a + // CURIE (`cid:service`) that still needs prefix expansion, so recurse — + // but only when we haven't already followed this term on the current + // expansion chain. if (context[uri]) { + const chain = seen || new Set(); + if (chain.has(uri)) return uri; + chain.add(uri); const expansion = context[uri]; if (typeof expansion === 'string') { - return expansion; + return expansion === uri ? uri : expandUri(expansion, context, chain); } if (expansion['@id']) { - return expansion['@id']; + const id = expansion['@id']; + return id === uri ? uri : expandUri(id, context, chain); } } diff --git a/src/webid/profile.js b/src/webid/profile.js index a9e79cf..0d987e0 100644 --- a/src/webid/profile.js +++ b/src/webid/profile.js @@ -12,6 +12,8 @@ const SOLID = 'http://www.w3.org/ns/solid/terms#'; const SCHEMA = 'http://schema.org/'; const LDP = 'http://www.w3.org/ns/ldp#'; const PIM = 'http://www.w3.org/ns/pim/space#'; +const CID = 'https://www.w3.org/ns/cid/v1#'; +const LWS = 'https://www.w3.org/ns/lws#'; /** * Generate JSON-LD data for a WebID profile @@ -24,6 +26,9 @@ const PIM = 'http://www.w3.org/ns/pim/space#'; */ export function generateProfileJsonLd({ webId, name, podUri, issuer }) { const pod = podUri.endsWith('/') ? podUri : podUri + '/'; + // Document URL is the WebID without its fragment; service entries use + // fragment ids resolved against it. + const docUrl = webId.split('#')[0]; return { '@context': { @@ -32,6 +37,8 @@ export function generateProfileJsonLd({ webId, name, podUri, issuer }) { 'schema': SCHEMA, 'pim': PIM, 'ldp': LDP, + 'cid': CID, + 'lws': LWS, 'inbox': { '@id': 'ldp:inbox', '@type': '@id' }, 'storage': { '@id': 'pim:storage', '@type': '@id' }, 'oidcIssuer': { '@id': 'solid:oidcIssuer', '@type': '@id' }, @@ -39,7 +46,9 @@ export function generateProfileJsonLd({ webId, name, podUri, issuer }) { 'publicTypeIndex': { '@id': 'solid:publicTypeIndex', '@type': '@id' }, 'privateTypeIndex': { '@id': 'solid:privateTypeIndex', '@type': '@id' }, 'isPrimaryTopicOf': { '@id': 'foaf:isPrimaryTopicOf', '@type': '@id' }, - 'mainEntityOfPage': { '@id': 'schema:mainEntityOfPage', '@type': '@id' } + 'mainEntityOfPage': { '@id': 'schema:mainEntityOfPage', '@type': '@id' }, + 'service': { '@id': 'cid:service', '@container': '@set' }, + 'serviceEndpoint': { '@id': 'cid:serviceEndpoint', '@type': '@id' } }, '@id': webId, '@type': ['foaf:Person', 'schema:Person'], @@ -51,7 +60,17 @@ export function generateProfileJsonLd({ webId, name, podUri, issuer }) { 'oidcIssuer': issuer, 'preferencesFile': `${pod}settings/prefs.jsonld`, 'publicTypeIndex': `${pod}settings/publicTypeIndex.jsonld`, - 'privateTypeIndex': `${pod}settings/privateTypeIndex.jsonld` + 'privateTypeIndex': `${pod}settings/privateTypeIndex.jsonld`, + // LWS 1.0 Controlled Identifier service entry — mirrors `oidcIssuer` so + // LWS-aware verifiers can establish trust. Additive; the legacy + // `solid:oidcIssuer` predicate stays for existing Solid clients. + 'service': [ + { + '@id': `${docUrl}#oidc`, + '@type': 'lws:OpenIdProvider', + 'serviceEndpoint': issuer + } + ] }; } diff --git a/test/turtle.test.js b/test/turtle.test.js new file mode 100644 index 0000000..c29ba2b --- /dev/null +++ b/test/turtle.test.js @@ -0,0 +1,104 @@ +/** + * Direct unit tests for the JSON-LD → Turtle converter. + * + * The focus is on regression coverage for properties that would otherwise + * be easy to regress silently: + * - cycle-safety in expandUri (DoS guard — a malicious context must not + * cause unbounded recursion / stack overflow) + * - duplicate @id across top-level docs must NOT suppress emission + * (the visited-set refactor previously dropped data) + * - cyclical nested node references must not hang the BFS + */ + +import { describe, it } from 'node:test'; +import assert from 'node:assert'; +import { fromJsonLd } from '../src/rdf/conneg.js'; + +describe('turtle converter — unit (#320 follow-ups)', () => { + it('expandUri does not recurse forever on a cyclic context (a → b → a)', async () => { + const doc = { + '@context': { + // Pathological: each term points at another term via CURIE, forming a loop. + 'a': { '@id': 'b:x' }, + 'b': { '@id': 'a:y' } + }, + '@id': 'https://example.test/s', + 'a': 'hello' + }; + // The converter should finish — not stack-overflow — regardless of what + // the output happens to look like. We only assert it completes with a + // string result. + const { content } = await fromJsonLd(doc, 'text/turtle', 'https://example.test/', true); + assert.ok(typeof content === 'string'); + }); + + it('expandUri does not recurse forever on a self-loop (a → a)', async () => { + const doc = { + '@context': { + 'selfy': 'selfy' + }, + '@id': 'https://example.test/s', + 'selfy': 'hello' + }; + const { content } = await fromJsonLd(doc, 'text/turtle', 'https://example.test/', true); + assert.ok(typeof content === 'string'); + }); + + it('duplicate top-level @id is not silently dropped', async () => { + // Two docs describing the same subject — both claims must survive. + // (Previously the visited-set in the BFS skipped the second pass.) + const docs = [ + { + '@context': { 'foaf': 'http://xmlns.com/foaf/0.1/' }, + '@id': 'https://example.test/alice', + 'foaf:name': 'Alice' + }, + { + '@context': { 'foaf': 'http://xmlns.com/foaf/0.1/' }, + '@id': 'https://example.test/alice', + 'foaf:age': 30 + } + ]; + const { content } = await fromJsonLd(docs, 'text/turtle', 'https://example.test/', true); + assert.ok(content.includes('Alice'), `Turtle should contain the name claim, got:\n${content}`); + assert.ok(/30|"30"/.test(content), `Turtle should contain the age claim, got:\n${content}`); + }); + + it('prefix-looking context key defined as an object is not string-concatenated', async () => { + // A user-supplied context can legally define a prefix-looking key as a + // term-definition object (not a namespace string). The converter must + // not treat it as a namespace — string-concatenating the object would + // produce invalid IRIs like "[object Object]foo". + const doc = { + '@context': { + // `bogus` is defined as a term object, not a namespace string. + 'bogus': { '@id': 'https://example.test/ns#bogus' } + }, + '@id': 'https://example.test/s', + // This looks like a CURIE `bogus:foo` but `bogus` is not a valid + // namespace — the converter should leave it alone. + 'bogus:foo': 'hello' + }; + const { content } = await fromJsonLd(doc, 'text/turtle', 'https://example.test/', true); + assert.ok(typeof content === 'string'); + assert.ok(!content.includes('[object Object]'), + `Turtle output must not contain object-stringification, got:\n${content}`); + }); + + it('cyclical nested node reference does not hang', async () => { + // Two nested nodes reference each other. BFS must not loop. + const a = { '@id': 'https://example.test/a', 'ex:knows': null }; + const b = { '@id': 'https://example.test/b', 'ex:knows': a }; + a['ex:knows'] = b; + + const doc = { + '@context': { 'ex': 'https://example.test/ns#' }, + '@id': 'https://example.test/root', + 'ex:knows': a + }; + const { content } = await fromJsonLd(doc, 'text/turtle', 'https://example.test/', true); + assert.ok(typeof content === 'string'); + assert.ok(content.includes('https://example.test/a'), 'node a should appear'); + assert.ok(content.includes('https://example.test/b'), 'node b should appear'); + }); +}); diff --git a/test/webid.test.js b/test/webid.test.js index 55b41c8..4d36233 100644 --- a/test/webid.test.js +++ b/test/webid.test.js @@ -98,6 +98,41 @@ describe('WebID Profile', () => { // Empty string is a relative URI reference to the document itself (JSON-LD) assert.strictEqual(jsonLd['isPrimaryTopicOf'], '', 'isPrimaryTopicOf should be "" (self)'); }); + + // LWS 1.0 Controlled Identifier alignment (#320). + // These assertions live alongside the WebID predicate assertions — both + // must continue to hold since the profile is dual-write. + it('should emit a CID service[] with an lws:OpenIdProvider entry', async () => { + const res = await request(profilePath); + const jsonLd = await res.json(); + assert.ok(Array.isArray(jsonLd.service), 'profile should have a service array'); + const oidc = jsonLd.service.find((s) => s['@type'] === 'lws:OpenIdProvider'); + assert.ok(oidc, 'service[] must include an lws:OpenIdProvider entry'); + }); + + it('lws:OpenIdProvider service.serviceEndpoint mirrors oidcIssuer', async () => { + const res = await request(profilePath); + const jsonLd = await res.json(); + assert.ok(Array.isArray(jsonLd.service), 'profile should have a service array'); + const oidc = jsonLd.service.find((s) => s['@type'] === 'lws:OpenIdProvider'); + assert.ok(oidc, 'service[] must include an lws:OpenIdProvider entry'); + assert.strictEqual( + oidc.serviceEndpoint, + jsonLd.oidcIssuer, + 'serviceEndpoint must equal the existing oidcIssuer value' + ); + }); + + it('lws:OpenIdProvider service.id is a fragment on the profile document', async () => { + const res = await request(profilePath); + const jsonLd = await res.json(); + assert.ok(Array.isArray(jsonLd.service), 'profile should have a service array'); + const oidc = jsonLd.service.find((s) => s['@type'] === 'lws:OpenIdProvider'); + assert.ok(oidc, 'service[] must include an lws:OpenIdProvider entry'); + const docUrl = jsonLd['@id'].split('#')[0]; + assert.strictEqual(oidc['@id'], `${docUrl}#oidc`, + 'service entry @id should be `#oidc`'); + }); }); describe('WebID Resolution', () => { @@ -119,3 +154,49 @@ describe('WebID Profile', () => { }); }); }); + +// With conneg enabled the profile is converted to Turtle on demand. The +// CID service[] must survive that conversion — LWS verifiers that ask for +// Turtle need to see the nested service node's type and serviceEndpoint, +// not just a bare URI reference to it. +describe('WebID Profile — Turtle conneg (#320)', () => { + before(async () => { + await startTestServer({ conneg: true }); + await createTestPod('webidturtletest'); + }); + + after(async () => { + await stopTestServer(); + }); + + it('Turtle variant includes cid:service with lws:OpenIdProvider and serviceEndpoint', async () => { + const res = await request('/webidturtletest/profile/card.jsonld', { + headers: { Accept: 'text/turtle' } + }); + assertStatus(res, 200); + assertHeaderContains(res, 'Content-Type', 'text/turtle'); + const ttl = await res.text(); + // Accept either prefixed (cid:service) or expanded full-URI form. The + // critical property is that the nested service node's data survived the + // JSON-LD → Turtle conversion — i.e. the type and endpoint are present + // as their own triples, not dropped. + assert.ok( + ttl.includes('cid:service') || ttl.includes('cid/v1#service'), + `Turtle should reference the CID service predicate, got:\n${ttl}` + ); + assert.ok( + ttl.includes('OpenIdProvider'), + `Turtle should declare the lws:OpenIdProvider type, got:\n${ttl}` + ); + assert.ok( + ttl.includes('cid:serviceEndpoint') || ttl.includes('cid/v1#serviceEndpoint'), + `Turtle should include the cid:serviceEndpoint predicate, got:\n${ttl}` + ); + // The service entry URI appears as a subject (its own line), proving it + // was emitted as a first-class node rather than a bare URI reference. + assert.ok( + /#oidc>\s+(?:a|<[^>]*#type>)/.test(ttl), + `Turtle should emit the service entry as a subject, got:\n${ttl}` + ); + }); +});