Skip to content

Commit 2b22da6

Browse files
committed
Add URDNA2015 normalization algorithm implementation
- Introduced `Urdna2015` class for RDF dataset normalization. - Added `IdentifierIssuer` class to manage unique identifier generation. - Updated `JsonLdApi` to support URDNA2015 algorithm selection. - Enhanced `JsonLdOptions` to include normalization algorithm options. - Expanded `NormalizeUtils` with utility methods for sorting and hashing. - Implemented methods for handling blank nodes and quad normalization.
1 parent 4a7305f commit 2b22da6

File tree

5 files changed

+936
-11
lines changed

5 files changed

+936
-11
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package com.github.jsonldjava.core;
2+
3+
import java.util.ArrayList;
4+
import java.util.HashMap;
5+
import java.util.List;
6+
import java.util.Map;
7+
8+
/**
9+
* An IdentifierIssuer issues unique identifiers, keeping track of any
10+
* previously issued identifiers. Used by the URDNA2015 normalization algorithm.
11+
*/
12+
public class IdentifierIssuer implements Cloneable {
13+
14+
private String prefix;
15+
private int counter;
16+
private Map<String, String> existing;
17+
private List<String> order;
18+
19+
public IdentifierIssuer(String prefix) {
20+
this.prefix = prefix;
21+
this.counter = 0;
22+
this.existing = new HashMap<>();
23+
this.order = new ArrayList<>();
24+
}
25+
26+
/**
27+
* Gets the new identifier for the given old identifier, where if no old
28+
* identifier is given a new identifier will be generated.
29+
*
30+
* @param old the old identifier to get the new identifier for (can be null)
31+
* @return the new identifier
32+
*/
33+
public String getId() {
34+
return this.getId(null);
35+
}
36+
37+
public String getId(String old) {
38+
if (old != null && existing.containsKey(old)) {
39+
return existing.get(old);
40+
}
41+
42+
String id = this.prefix + Integer.toString(counter);
43+
this.counter += 1;
44+
45+
if (old != null) {
46+
this.existing.put(old, id);
47+
this.order.add(old);
48+
}
49+
50+
return id;
51+
}
52+
53+
/**
54+
* Returns True if the given old identifier has already been assigned a
55+
* new identifier.
56+
*
57+
* @param old the old identifier to check
58+
* @return True if the old identifier has been assigned a new identifier,
59+
* False if not
60+
*/
61+
public boolean hasID(String old) {
62+
return this.existing.containsKey(old);
63+
}
64+
65+
public List<String> getOrder() {
66+
return this.order;
67+
}
68+
69+
public String getPrefix() {
70+
return this.prefix;
71+
}
72+
73+
@Override
74+
public Object clone() {
75+
try {
76+
IdentifierIssuer cloned = (IdentifierIssuer) super.clone();
77+
cloned.existing = new HashMap<>(this.existing);
78+
cloned.order = new ArrayList<>(this.order);
79+
return cloned;
80+
} catch (CloneNotSupportedException e) {
81+
throw new RuntimeException(e);
82+
}
83+
}
84+
}
85+

core/src/main/java/com/github/jsonldjava/core/JsonLdApi.java

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,15 +2185,50 @@ public RDFDataset toRDF() throws JsonLdError {
21852185
* If there was an error while normalizing.
21862186
*/
21872187
public Object normalize(Map<String, Object> dataset) throws JsonLdError {
2188+
return normalize(dataset, opts);
2189+
}
2190+
2191+
/**
2192+
* Performs RDF normalization on the given JSON-LD input.
2193+
*
2194+
* @param dataset
2195+
* the expanded JSON-LD object to normalize.
2196+
* @param options
2197+
* the JSON-LD options containing the algorithm to use.
2198+
* @return The normalized JSON-LD object
2199+
* @throws JsonLdError
2200+
* If there was an error while normalizing.
2201+
*/
2202+
public Object normalize(Map<String, Object> dataset, JsonLdOptions options) throws JsonLdError {
2203+
if (options.getAlgorithm().equals(JsonLdOptions.URGNA2012)) {
2204+
return normalizeURGN2012(dataset);
2205+
} else if (options.getAlgorithm().equals(JsonLdOptions.URDNA2015)) {
2206+
return normalizeURDNA2015(dataset, options);
2207+
}
2208+
2209+
return null;
2210+
}
2211+
2212+
/**
2213+
* Normalizes using the URGNA2012 algorithm.
2214+
*
2215+
* @param dataset
2216+
* the expanded JSON-LD object to normalize.
2217+
* @return The normalized JSON-LD object
2218+
* @throws JsonLdError
2219+
* If there was an error while normalizing.
2220+
*/
2221+
public Object normalizeURGN2012(Map<String, Object> dataset) throws JsonLdError {
21882222
// create quads and map bnodes to their associated quads
21892223
final List<Object> quads = new ArrayList<Object>();
21902224
final Map<String, Object> bnodes = newMap();
2225+
21912226
for (String graphName : dataset.keySet()) {
2192-
final List<Map<String, Object>> triples = (List<Map<String, Object>>) dataset
2193-
.get(graphName);
2227+
final List<Map<String, Object>> triples = (List<Map<String, Object>>) dataset.get(graphName);
21942228
if (JsonLdConsts.DEFAULT.equals(graphName)) {
21952229
graphName = null;
21962230
}
2231+
21972232
for (final Map<String, Object> quad : triples) {
21982233
if (graphName != null) {
21992234
if (graphName.indexOf("_:") == 0) {
@@ -2212,28 +2247,40 @@ public Object normalize(Map<String, Object> dataset) throws JsonLdError {
22122247

22132248
final String[] attrs = new String[] { "subject", "object", "name" };
22142249
for (final String attr : attrs) {
2215-
if (quad.containsKey(attr) && "blank node"
2216-
.equals(((Map<String, Object>) quad.get(attr)).get("type"))) {
2217-
final String id = (String) ((Map<String, Object>) quad.get(attr))
2218-
.get("value");
2250+
if (quad.containsKey(attr)
2251+
&& "blank node".equals(((Map<String, Object>) quad.get(attr)).get("type"))) {
2252+
final String id = (String) ((Map<String, Object>) quad.get(attr)).get("value");
22192253
if (!bnodes.containsKey(id)) {
22202254
bnodes.put(id, new LinkedHashMap<String, List<Object>>() {
22212255
{
22222256
put("quads", new ArrayList<Object>());
22232257
}
22242258
});
22252259
}
2226-
((List<Object>) ((Map<String, Object>) bnodes.get(id)).get("quads"))
2227-
.add(quad);
2260+
((List<Object>) ((Map<String, Object>) bnodes.get(id)).get("quads")).add(quad);
22282261
}
22292262
}
22302263
}
22312264
}
22322265

22332266
// mapping complete, start canonical naming
2234-
final NormalizeUtils normalizeUtils = new NormalizeUtils(quads, bnodes,
2235-
new UniqueNamer("_:c14n"), opts);
2267+
final NormalizeUtils normalizeUtils = new NormalizeUtils(quads, bnodes, new UniqueNamer("_:c14n"), opts);
22362268
return normalizeUtils.hashBlankNodes(bnodes.keySet());
22372269
}
22382270

2271+
/**
2272+
* Normalizes using the URDNA2015 algorithm.
2273+
*
2274+
* @param dataset
2275+
* the expanded JSON-LD object to normalize.
2276+
* @param options
2277+
* the JSON-LD options to use.
2278+
* @return The normalized JSON-LD object
2279+
* @throws JsonLdError
2280+
* If there was an error while normalizing.
2281+
*/
2282+
public Object normalizeURDNA2015(Map<String, Object> dataset, JsonLdOptions options) throws JsonLdError {
2283+
return new Urdna2015(dataset, options).normalize();
2284+
}
2285+
22392286
}

core/src/main/java/com/github/jsonldjava/core/JsonLdOptions.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ public class JsonLdOptions {
1818

1919
public static final boolean DEFAULT_COMPACT_ARRAYS = true;
2020

21+
// Normalization algorithm constants
22+
public static final String URGNA2012 = "URGNA2012";
23+
public static final String URDNA2015 = "URDNA2015";
24+
2125
/**
2226
* Constructs an instance of JsonLdOptions using an empty base.
2327
*/
@@ -61,6 +65,7 @@ public JsonLdOptions copy() {
6165
copy.setUseRdfType(useRdfType);
6266
copy.setUseNativeTypes(useNativeTypes);
6367
copy.setProduceGeneralizedRdf(produceGeneralizedRdf);
68+
copy.setAlgorithm(algorithm);
6469
copy.format = format;
6570
copy.useNamespaces = useNamespaces;
6671
copy.outputForm = outputForm;
@@ -110,6 +115,9 @@ public JsonLdOptions copy() {
110115
Boolean useNativeTypes = false;
111116
private boolean produceGeneralizedRdf = false;
112117

118+
// Normalization algorithm option
119+
private String algorithm = URGNA2012;
120+
113121
public String getEmbed() {
114122
switch (this.embed) {
115123
case ALWAYS:
@@ -294,6 +302,14 @@ public void setDocumentLoader(DocumentLoader documentLoader) {
294302
this.documentLoader = documentLoader;
295303
}
296304

305+
public String getAlgorithm() {
306+
return algorithm;
307+
}
308+
309+
public void setAlgorithm(String algorithm) {
310+
this.algorithm = algorithm;
311+
}
312+
297313
// TODO: THE FOLLOWING ONLY EXIST SO I DON'T HAVE TO DELETE A LOT OF CODE,
298314
// REMOVE IT WHEN DONE
299315
public String format = null;

core/src/main/java/com/github/jsonldjava/core/NormalizeUtils.java

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,85 @@ private static String encodeHex(final byte[] data) {
479479
return rval;
480480
}
481481

482+
/**
483+
* Helper methods for URDNA2015 algorithm
484+
*/
485+
486+
/**
487+
* Sorts map keys and returns them as a list
488+
*/
489+
public static List<String> sortMapKeys(Map<String, ?> map) {
490+
List<String> keyList = new ArrayList<>(map.keySet());
491+
Collections.sort(keyList);
492+
return keyList;
493+
}
494+
495+
/**
496+
* Sorts a list of maps by their keys
497+
*/
498+
public static List<Map<String, Object>> sortMapList(List<Map<String, Object>> mapList) {
499+
return sortMapList(mapList, true);
500+
}
501+
502+
/**
503+
* Sorts a list of maps by their keys, with optional recursion
504+
*/
505+
public static List<Map<String, Object>> sortMapList(List<Map<String, Object>> mapList, boolean recursion) {
506+
List<Map<String, Object>> sortedMapsList = new ArrayList<>();
507+
for (Map<String, Object> map : mapList) {
508+
Map<String, Object> newMap = new LinkedHashMap<>();
509+
List<String> keyList = new ArrayList<>(map.keySet());
510+
Collections.sort(keyList);
511+
512+
for (String key : keyList) {
513+
newMap.put(key, map.get(key));
514+
}
515+
sortedMapsList.add(newMap);
516+
}
517+
if (recursion) {
518+
return sortMapList(sortedMapsList, false);
519+
}
520+
return sortedMapsList;
521+
}
522+
523+
/**
524+
* SHA-256 hash for a list of N-Quads strings
525+
*/
526+
public static String sha256HashnQuads(List<String> nquads) {
527+
StringBuilder stringToHash = new StringBuilder();
528+
for (String nquad : nquads) {
529+
stringToHash.append(nquad);
530+
}
531+
return sha256Hash(stringToHash.toString().getBytes());
532+
}
533+
534+
/**
535+
* SHA-256 hash for a string
536+
*/
537+
public static String sha256Hash(String string) {
538+
return sha256Hash(string.getBytes());
539+
}
540+
541+
/**
542+
* SHA-256 hash for byte array
543+
*/
544+
public static String sha256Hash(byte[] bytes) {
545+
return encodeHex(sha256Raw(bytes));
546+
}
547+
548+
/**
549+
* SHA-256 raw hash for byte array
550+
*/
551+
public static byte[] sha256Raw(byte[] bytes) {
552+
try {
553+
MessageDigest sha = MessageDigest.getInstance("SHA-256");
554+
sha.update(bytes);
555+
return sha.digest();
556+
} catch (NoSuchAlgorithmException e) {
557+
throw new RuntimeException(e);
558+
}
559+
}
560+
482561
/**
483562
* A helper function that gets the blank node name from an RDF quad node
484563
* (subject or object). If the node is a blank node and its value does not
@@ -498,7 +577,7 @@ private static String getAdjacentBlankNodeName(Map<String, Object> node, String
498577
: null;
499578
}
500579

501-
private static class Permutator {
580+
public static class Permutator {
502581

503582
private final List<String> list;
504583
private boolean done;

0 commit comments

Comments
 (0)