From 2e1135dafb381fde7d508d055cad2e052dcc253d Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 24 Aug 2020 16:15:35 +0200 Subject: [PATCH 01/11] Follow alternate document location Implements https://www.w3.org/TR/json-ld11/#alternate-document-location. Resolves https://github.com/jsonld-java/jsonld-java/issues/289. --- .../github/jsonldjava/utils/JsonUtils.java | 68 +++++++++++++++---- .../core/MinimalSchemaOrgRegressionTest.java | 34 +++------- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index f7e0581b..da210958 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -9,14 +9,17 @@ import java.io.StringWriter; import java.io.Writer; import java.net.HttpURLConnection; +import java.net.MalformedURLException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.net.URL; import java.util.List; import java.util.Map; import org.apache.commons.io.ByteOrderMark; import org.apache.commons.io.IOUtils; import org.apache.commons.io.input.BOMInputStream; +import org.apache.http.Header; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpUriRequest; @@ -344,18 +347,7 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient) // Accept headers as it's likely to be file: or jar: in = url.openStream(); } else { - final HttpUriRequest request = new HttpGet(url.toExternalForm()); - // We prefer application/ld+json, but fallback to - // application/json - // or whatever is available - request.addHeader("Accept", ACCEPT_HEADER); - - response = httpClient.execute(request); - final int status = response.getStatusLine().getStatusCode(); - if (status != 200 && status != 203) { - throw new IOException("Can't retrieve " + url + ", status code: " + status); - } - in = response.getEntity().getContent(); + in = getJsonLdViaHttpUri(url, httpClient, response); } return fromInputStream(in); } finally { @@ -371,6 +363,56 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient) } } + private static InputStream getJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient, + CloseableHttpResponse response) throws IOException { + final HttpUriRequest request = new HttpGet(url.toExternalForm()); + // We prefer application/ld+json, but fallback to application/json + // or whatever is available + request.addHeader("Accept", ACCEPT_HEADER); + response = httpClient.execute(request); + + final int status = response.getStatusLine().getStatusCode(); + if (status != 200 && status != 203) { + throw new IOException("Can't retrieve " + url + ", status code: " + status); + } + // follow alternate document location + // https://www.w3.org/TR/json-ld11/#alternate-document-location + URL alternateLink = alternateLink(url, response); + if (alternateLink != null) { + return getJsonLdViaHttpUri(alternateLink, httpClient, response); + } + return response.getEntity().getContent(); + } + + private static URL alternateLink(URL url, CloseableHttpResponse response) + throws MalformedURLException, IOException { + if (response.getEntity().getContentLength() > 0 + && !response.getEntity().getContentType().getValue().equals("application/ld+json")) { + for (Header header : response.getAllHeaders()) { + if (header.getName().equalsIgnoreCase("link")) { + String alternateLink = ""; + boolean relAlternate = false; + boolean jsonld = false; + for (String value : header.getValue().split(";")) { + if (value.trim().startsWith("<")) { + alternateLink = value.replaceAll("<(.*)>", "$1"); + } + if (value.trim().startsWith("type=\"application/ld+json\"")) { + jsonld = true; + } + if (value.trim().startsWith("rel=\"alternate\"")) { + relAlternate = true; + } + } + if (jsonld && relAlternate && !alternateLink.isEmpty()) { + return new URL(url.getProtocol() + "://" + url.getAuthority() + alternateLink); + } + } + } + } + return null; + } + /** * Fallback method directly using the {@link java.net.HttpURLConnection} * class for cases where servers do not interoperate correctly with Apache @@ -384,7 +426,7 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient) * @throws IOException * If there was an IO error during parsing. */ - public static Object fromURLJavaNet(java.net.URL url) throws JsonParseException, IOException { + public static Object fromURLJavaNet(URL url) throws JsonParseException, IOException { final HttpURLConnection urlConn = (HttpURLConnection) url.openConnection(); urlConn.addRequestProperty("Accept", ACCEPT_HEADER); diff --git a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java index f4c1b88d..03479f76 100644 --- a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java +++ b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java @@ -26,6 +26,7 @@ import org.junit.Test; import com.github.jsonldjava.utils.JarCacheStorage; +import com.github.jsonldjava.utils.JsonUtils; public class MinimalSchemaOrgRegressionTest { @@ -59,10 +60,13 @@ private void verifyInputStream(InputStream directStream) throws IOException { output.flush(); } final String outputString = output.toString(); - // System.out.println(outputString); + checkBasicConditions(outputString); + } + + private void checkBasicConditions(final String outputString) { // Test for some basic conditions without including the JSON/JSON-LD // parsing code here - // assertTrue(outputString, outputString.endsWith("}")); + assertTrue(outputString, outputString.endsWith("}")); assertFalse("Output string should not be empty: " + outputString.length(), outputString.isEmpty()); assertTrue("Unexpected length: " + outputString.length(), outputString.length() > 100000); @@ -90,30 +94,8 @@ public void testApacheHttpClient() throws Exception { // use system defaults for proxy etc. .useSystemProperties().build(); - try { - final HttpUriRequest request = new HttpGet(url.toExternalForm()); - // We prefer application/ld+json, but fallback to application/json - // or whatever is available - request.addHeader("Accept", ACCEPT_HEADER); - - final CloseableHttpResponse response = httpClient.execute(request); - try { - final int status = response.getStatusLine().getStatusCode(); - if (status != 200 && status != 203) { - throw new IOException("Can't retrieve " + url + ", status code: " + status); - } - final InputStream content = response.getEntity().getContent(); - verifyInputStream(content); - } finally { - if (response != null) { - response.close(); - } - } - } finally { - if (httpClient != null) { - httpClient.close(); - } - } + Object content = JsonUtils.fromURL(url, httpClient); + checkBasicConditions(content.toString()); } } From a2e6f8aed492fd6fb50ca920b683067a98c76c94 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 24 Aug 2020 17:59:45 +0200 Subject: [PATCH 02/11] Remove unused imports Complements the last commit. --- .../github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java index 03479f76..fef53b25 100644 --- a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java +++ b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java @@ -12,9 +12,6 @@ import java.nio.charset.StandardCharsets; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.protocol.RequestAcceptEncoding; import org.apache.http.client.protocol.ResponseContentEncoding; import org.apache.http.impl.client.CloseableHttpClient; From 2ef090b451af16baccd1b51ee792c52a46231c9d Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 24 Aug 2020 18:18:16 +0200 Subject: [PATCH 03/11] Remove ignored test The ignored test would test an expected behaviour of java.net.URL.HttpURLConnection, i.e. not to automatically follow redirects when this involves a protocol switching (e.g. from HTTP to HTTPS). See #289. --- .../core/MinimalSchemaOrgRegressionTest.java | 84 ++++++------------- 1 file changed, 27 insertions(+), 57 deletions(-) diff --git a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java index fef53b25..4e3c6868 100644 --- a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java +++ b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java @@ -19,7 +19,6 @@ import org.apache.http.impl.client.cache.BasicHttpCacheStorage; import org.apache.http.impl.client.cache.CacheConfig; import org.apache.http.impl.client.cache.CachingHttpClientBuilder; -import org.junit.Ignore; import org.junit.Test; import com.github.jsonldjava.utils.JarCacheStorage; @@ -27,37 +26,34 @@ public class MinimalSchemaOrgRegressionTest { - private static final String ACCEPT_HEADER = "application/ld+json, application/json;q=0.9, application/javascript;q=0.5, text/javascript;q=0.5, text/plain;q=0.2, */*;q=0.1"; - - @Ignore("Java API does not have any way of redirecting automatically from HTTP to HTTPS, which breaks schema.org usage with it") + /** + * Tests getting JSON from schema.org with the HTTP Accept header set to + * {@value com.github.jsonldjava.utils.JsonUtils#ACCEPT_HEADER}? . + */ @Test - public void testHttpURLConnection() throws Exception { + public void testApacheHttpClient() throws Exception { final URL url = new URL("http://schema.org/"); - final boolean followRedirectsSetting = HttpURLConnection.getFollowRedirects(); - try { - HttpURLConnection.setFollowRedirects(true); - final HttpURLConnection urlConn = (HttpURLConnection) url.openConnection(); - urlConn.setInstanceFollowRedirects(true); - urlConn.addRequestProperty("Accept", ACCEPT_HEADER); - - final InputStream directStream = urlConn.getInputStream(); - verifyInputStream(directStream); - } finally { - HttpURLConnection.setFollowRedirects(followRedirectsSetting); - } - } - - private void verifyInputStream(InputStream directStream) throws IOException { - assertNotNull("InputStream was null", directStream); - final StringWriter output = new StringWriter(); - try { - IOUtils.copy(directStream, output, StandardCharsets.UTF_8); - } finally { - directStream.close(); - output.flush(); - } - final String outputString = output.toString(); - checkBasicConditions(outputString); + // Common CacheConfig for both the JarCacheStorage and the underlying + // BasicHttpCacheStorage + final CacheConfig cacheConfig = CacheConfig.custom().setMaxCacheEntries(1000) + .setMaxObjectSize(1024 * 128).build(); + + final CloseableHttpClient httpClient = CachingHttpClientBuilder.create() + // allow caching + .setCacheConfig(cacheConfig) + // Wrap the local JarCacheStorage around a BasicHttpCacheStorage + .setHttpCacheStorage(new JarCacheStorage(null, cacheConfig, + new BasicHttpCacheStorage(cacheConfig))) + // Support compressed data + // http://hc.apache.org/httpcomponents-client-ga/tutorial/html/httpagent.html#d5e1238 + .addInterceptorFirst(new RequestAcceptEncoding()) + .addInterceptorFirst(new ResponseContentEncoding()) + .setRedirectStrategy(DefaultRedirectStrategy.INSTANCE) + // use system defaults for proxy etc. + .useSystemProperties().build(); + + Object content = JsonUtils.fromURL(url, httpClient); + checkBasicConditions(content.toString()); } private void checkBasicConditions(final String outputString) { @@ -68,31 +64,5 @@ private void checkBasicConditions(final String outputString) { outputString.isEmpty()); assertTrue("Unexpected length: " + outputString.length(), outputString.length() > 100000); } - - @Test - public void testApacheHttpClient() throws Exception { - final URL url = new URL("http://schema.org/"); - // Common CacheConfig for both the JarCacheStorage and the underlying - // BasicHttpCacheStorage - final CacheConfig cacheConfig = CacheConfig.custom().setMaxCacheEntries(1000) - .setMaxObjectSize(1024 * 128).build(); - - final CloseableHttpClient httpClient = CachingHttpClientBuilder.create() - // allow caching - .setCacheConfig(cacheConfig) - // Wrap the local JarCacheStorage around a BasicHttpCacheStorage - .setHttpCacheStorage(new JarCacheStorage(null, cacheConfig, - new BasicHttpCacheStorage(cacheConfig))) - // Support compressed data - // http://hc.apache.org/httpcomponents-client-ga/tutorial/html/httpagent.html#d5e1238 - .addInterceptorFirst(new RequestAcceptEncoding()) - .addInterceptorFirst(new ResponseContentEncoding()) - .setRedirectStrategy(DefaultRedirectStrategy.INSTANCE) - // use system defaults for proxy etc. - .useSystemProperties().build(); - - Object content = JsonUtils.fromURL(url, httpClient); - checkBasicConditions(content.toString()); - } - + } From a2f3c9f7b15e26b03bc4c3137555eb5b5efd3d07 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 25 Aug 2020 10:50:42 +0200 Subject: [PATCH 04/11] Reorganize imports - remove unused imports - sorted alphabetically --- .../github/jsonldjava/utils/JsonUtils.java | 24 +++++++++---------- .../core/MinimalSchemaOrgRegressionTest.java | 13 +++------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index da210958..9e1ec875 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -10,12 +10,23 @@ import java.io.Writer; import java.net.HttpURLConnection; import java.net.MalformedURLException; +import java.net.URL; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.net.URL; import java.util.List; import java.util.Map; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonGenerationException; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.jsonldjava.core.DocumentLoader; +import com.github.jsonldjava.core.JsonLdApi; +import com.github.jsonldjava.core.JsonLdProcessor; + import org.apache.commons.io.ByteOrderMark; import org.apache.commons.io.IOUtils; import org.apache.commons.io.input.BOMInputStream; @@ -32,17 +43,6 @@ import org.apache.http.impl.client.cache.CacheConfig; import org.apache.http.impl.client.cache.CachingHttpClientBuilder; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonGenerationException; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonToken; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.github.jsonldjava.core.DocumentLoader; -import com.github.jsonldjava.core.JsonLdApi; -import com.github.jsonldjava.core.JsonLdProcessor; - /** * Functions used to make loading, parsing, and serializing JSON easy using * Jackson. diff --git a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java index 4e3c6868..4694f897 100644 --- a/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java +++ b/core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java @@ -1,17 +1,13 @@ package com.github.jsonldjava.core; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; -import java.net.HttpURLConnection; import java.net.URL; -import java.nio.charset.StandardCharsets; -import org.apache.commons.io.IOUtils; +import com.github.jsonldjava.utils.JarCacheStorage; +import com.github.jsonldjava.utils.JsonUtils; + import org.apache.http.client.protocol.RequestAcceptEncoding; import org.apache.http.client.protocol.ResponseContentEncoding; import org.apache.http.impl.client.CloseableHttpClient; @@ -21,9 +17,6 @@ import org.apache.http.impl.client.cache.CachingHttpClientBuilder; import org.junit.Test; -import com.github.jsonldjava.utils.JarCacheStorage; -import com.github.jsonldjava.utils.JsonUtils; - public class MinimalSchemaOrgRegressionTest { /** From 776e77dc63612ef294b814e3ad5d25fddf6e8e1e Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 25 Aug 2020 14:37:27 +0200 Subject: [PATCH 05/11] Try-with on response in the submethod As proposed in https://github.com/jsonld-java/jsonld-java/pull/292#discussion_r475983912 --- .../github/jsonldjava/utils/JsonUtils.java | 57 +++++++------------ 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index 9e1ec875..7ee7b85d 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -338,50 +338,35 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient) final String protocol = url.getProtocol(); // We can only use the Apache HTTPClient for HTTP/HTTPS, so use the // native java client for the others - CloseableHttpResponse response = null; - InputStream in = null; - try { - if (!protocol.equalsIgnoreCase("http") && !protocol.equalsIgnoreCase("https")) { - // Can't use the HTTP client for those! - // Fallback to Java's built-in JsonLdUrl handler. No need for - // Accept headers as it's likely to be file: or jar: - in = url.openStream(); - } else { - in = getJsonLdViaHttpUri(url, httpClient, response); - } - return fromInputStream(in); - } finally { - try { - if (in != null) { - in.close(); - } - } finally { - if (response != null) { - response.close(); - } - } + if (!protocol.equalsIgnoreCase("http") && !protocol.equalsIgnoreCase("https")) { + // Can't use the HTTP client for those! + // Fallback to Java's built-in JsonLdUrl handler. No need for + // Accept headers as it's likely to be file: or jar: + return fromInputStream(url.openStream()); + } else { + return fromInputStream(getJsonLdViaHttpUri(url, httpClient)); } } - private static InputStream getJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient, - CloseableHttpResponse response) throws IOException { + private static InputStream getJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient) + throws IOException { final HttpUriRequest request = new HttpGet(url.toExternalForm()); // We prefer application/ld+json, but fallback to application/json // or whatever is available request.addHeader("Accept", ACCEPT_HEADER); - response = httpClient.execute(request); - - final int status = response.getStatusLine().getStatusCode(); - if (status != 200 && status != 203) { - throw new IOException("Can't retrieve " + url + ", status code: " + status); - } - // follow alternate document location - // https://www.w3.org/TR/json-ld11/#alternate-document-location - URL alternateLink = alternateLink(url, response); - if (alternateLink != null) { - return getJsonLdViaHttpUri(alternateLink, httpClient, response); + try (CloseableHttpResponse response = httpClient.execute(request)) { + final int status = response.getStatusLine().getStatusCode(); + if (status != 200 && status != 203) { + throw new IOException("Can't retrieve " + url + ", status code: " + status); + } + // follow alternate document location + // https://www.w3.org/TR/json-ld11/#alternate-document-location + URL alternateLink = alternateLink(url, response); + if (alternateLink != null) { + return getJsonLdViaHttpUri(alternateLink, httpClient); + } + return response.getEntity().getContent(); } - return response.getEntity().getContent(); } private static URL alternateLink(URL url, CloseableHttpResponse response) From 818b118d23180385ad9ab667c8e1a022984e9fa5 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 27 Aug 2020 14:24:02 +0200 Subject: [PATCH 06/11] Avoid reading from closed stream --- .../main/java/com/github/jsonldjava/utils/JsonUtils.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index 7ee7b85d..89e2d0cb 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -344,11 +344,11 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient) // Accept headers as it's likely to be file: or jar: return fromInputStream(url.openStream()); } else { - return fromInputStream(getJsonLdViaHttpUri(url, httpClient)); + return fromJsonLdViaHttpUri(url, httpClient); } } - private static InputStream getJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient) + private static Object fromJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient) throws IOException { final HttpUriRequest request = new HttpGet(url.toExternalForm()); // We prefer application/ld+json, but fallback to application/json @@ -363,9 +363,9 @@ private static InputStream getJsonLdViaHttpUri(final URL url, final CloseableHtt // https://www.w3.org/TR/json-ld11/#alternate-document-location URL alternateLink = alternateLink(url, response); if (alternateLink != null) { - return getJsonLdViaHttpUri(alternateLink, httpClient); + return fromJsonLdViaHttpUri(alternateLink, httpClient); } - return response.getEntity().getContent(); + return fromInputStream(response.getEntity().getContent()); } } From 1d675d7e1e4ccde0545b336d98342439bc73da4e Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 28 Aug 2020 11:44:06 +0200 Subject: [PATCH 07/11] Abort if to many alternate links are followed. This avoids a possible endless loop. See https://github.com/jsonld-java/jsonld-java/pull/292#discussion_r475985782. --- .../com/github/jsonldjava/utils/JsonUtils.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index 89e2d0cb..5d4594e7 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -42,6 +42,8 @@ import org.apache.http.impl.client.cache.BasicHttpCacheStorage; import org.apache.http.impl.client.cache.CacheConfig; import org.apache.http.impl.client.cache.CachingHttpClientBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Functions used to make loading, parsing, and serializing JSON easy using @@ -69,6 +71,10 @@ public class JsonUtils { private static final JsonFactory JSON_FACTORY = new JsonFactory(JSON_MAPPER); private static volatile CloseableHttpClient DEFAULT_HTTP_CLIENT; + // Avoid possible endless loop when following alternate locations + private static final int MAX_LINKS_FOLLOW = 20; + private static final Logger log = LoggerFactory.getLogger(JsonUtils.class); + static { // Disable default Jackson behaviour to close @@ -344,11 +350,11 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient) // Accept headers as it's likely to be file: or jar: return fromInputStream(url.openStream()); } else { - return fromJsonLdViaHttpUri(url, httpClient); + return fromJsonLdViaHttpUri(url, httpClient, 0); } } - private static Object fromJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient) + private static Object fromJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient, int linksFollowed) throws IOException { final HttpUriRequest request = new HttpGet(url.toExternalForm()); // We prefer application/ld+json, but fallback to application/json @@ -363,7 +369,13 @@ private static Object fromJsonLdViaHttpUri(final URL url, final CloseableHttpCli // https://www.w3.org/TR/json-ld11/#alternate-document-location URL alternateLink = alternateLink(url, response); if (alternateLink != null) { - return fromJsonLdViaHttpUri(alternateLink, httpClient); + linksFollowed++; + if (linksFollowed > MAX_LINKS_FOLLOW) { + log.warn("Too many alternate links followed. This may indicate a cycle. Aborting."); + return null; + } + return linksFollowed > MAX_LINKS_FOLLOW ? null + : fromJsonLdViaHttpUri(alternateLink, httpClient, linksFollowed); } return fromInputStream(response.getEntity().getContent()); } From 1229f7353bd1f996aa1f0a6e3e6536f16cb4615b Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 28 Aug 2020 11:48:27 +0200 Subject: [PATCH 08/11] Fix missing trim(); simplify value parsing See https://github.com/jsonld-java/jsonld-java/pull/292#discussion_r475986626 and https://github.com/jsonld-java/jsonld-java/pull/292#discussion_r475987519. --- .../main/java/com/github/jsonldjava/utils/JsonUtils.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index 5d4594e7..78aebb81 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -391,13 +391,14 @@ private static URL alternateLink(URL url, CloseableHttpResponse response) boolean relAlternate = false; boolean jsonld = false; for (String value : header.getValue().split(";")) { - if (value.trim().startsWith("<")) { - alternateLink = value.replaceAll("<(.*)>", "$1"); + value=value.trim(); + if (value.startsWith("<") && value.endsWith(">")) { + alternateLink = value.substring(1, value.length() - 1); } - if (value.trim().startsWith("type=\"application/ld+json\"")) { + if (value.startsWith("type=\"application/ld+json\"")) { jsonld = true; } - if (value.trim().startsWith("rel=\"alternate\"")) { + if (value.startsWith("rel=\"alternate\"")) { relAlternate = true; } } From faddb48a65b53e7541359ab0a1a6840e125c4239 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 28 Aug 2020 13:49:34 +0200 Subject: [PATCH 09/11] Fix testing if object not null It should be checked if an Entity has a contentType, not if an Entity has content. See https://github.com/jsonld-java/jsonld-java/pull/292#discussion_r475989013 and https://github.com/jsonld-java/jsonld-java/pull/292#discussion_r475989860. --- core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index 78aebb81..5fcecb24 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -383,7 +383,7 @@ private static Object fromJsonLdViaHttpUri(final URL url, final CloseableHttpCli private static URL alternateLink(URL url, CloseableHttpResponse response) throws MalformedURLException, IOException { - if (response.getEntity().getContentLength() > 0 + if (response.getEntity().getContentType() != null && !response.getEntity().getContentType().getValue().equals("application/ld+json")) { for (Header header : response.getAllHeaders()) { if (header.getName().equalsIgnoreCase("link")) { From 9100b1355770734b264edf727d9cb77c1d64dd61 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 8 Sep 2020 16:21:50 +0200 Subject: [PATCH 10/11] Throw IOException instead of returning null - remove superfluous check (was always false) - remove superfluous IOException - remove logger as it is no more needed See https://github.com/jsonld-java/jsonld-java/pull/292#commitcomment-42055866. --- .../java/com/github/jsonldjava/utils/JsonUtils.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index 5fcecb24..f66cd5fd 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -73,8 +73,6 @@ public class JsonUtils { private static volatile CloseableHttpClient DEFAULT_HTTP_CLIENT; // Avoid possible endless loop when following alternate locations private static final int MAX_LINKS_FOLLOW = 20; - private static final Logger log = LoggerFactory.getLogger(JsonUtils.class); - static { // Disable default Jackson behaviour to close @@ -371,18 +369,16 @@ private static Object fromJsonLdViaHttpUri(final URL url, final CloseableHttpCli if (alternateLink != null) { linksFollowed++; if (linksFollowed > MAX_LINKS_FOLLOW) { - log.warn("Too many alternate links followed. This may indicate a cycle. Aborting."); - return null; + throw new IOException("Too many alternate links followed. This may indicate a cycle. Aborting."); } - return linksFollowed > MAX_LINKS_FOLLOW ? null - : fromJsonLdViaHttpUri(alternateLink, httpClient, linksFollowed); + return fromJsonLdViaHttpUri(alternateLink, httpClient, linksFollowed); } return fromInputStream(response.getEntity().getContent()); } } private static URL alternateLink(URL url, CloseableHttpResponse response) - throws MalformedURLException, IOException { + throws MalformedURLException { if (response.getEntity().getContentType() != null && !response.getEntity().getContentType().getValue().equals("application/ld+json")) { for (Header header : response.getAllHeaders()) { From d5e27615b27efb2f69c5d41b18df8523a503a514 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 8 Sep 2020 16:55:33 +0200 Subject: [PATCH 11/11] Close InputStream within finally block See https://github.com/jsonld-java/jsonld-java/pull/292#commitcomment-42055925. --- core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java index f66cd5fd..c24c8467 100644 --- a/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java +++ b/core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java @@ -116,6 +116,10 @@ public static Object fromInputStream(InputStream input) throws IOException { } } return fromInputStream(bOMInputStream, charset); + } finally { + if (input != null) { + input.close(); + } } }