From f9b3a5b5e6253ac8d1e8ecc2b30fa3b0d7ba2ca7 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Tue, 14 Feb 2023 23:25:44 +0100 Subject: [PATCH 1/5] actually extract `.html.erb` files --- .../src/com/semmle/js/extractor/FileExtractor.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 45bd48bf4087..b26895d82e69 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -104,7 +104,7 @@ private static int skipBOM(byte[] bytes, int length) { /** Information about supported file types. */ public static enum FileType { - HTML(".htm", ".html", ".xhtm", ".xhtml", ".vue", ".hbs", ".ejs", ".njk", ".html.erb") { + HTML(".htm", ".html", ".xhtm", ".xhtml", ".vue", ".hbs", ".ejs", ".njk", ".erb") { @Override public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) { return new HTMLExtractor(config, state); @@ -120,6 +120,12 @@ protected boolean contains(File f, String lcExt, ExtractorConfig config) { if (isBinaryFile(f, lcExt, config)) { return false; } + // for ERB files we are only interrested in `.html.erb` files + if (FileUtil.extension(f).equalsIgnoreCase(".erb")) { + if (!f.getName().endsWith(".html.erb")) { + return false; + } + } return super.contains(f, lcExt, config); } }, @@ -350,7 +356,7 @@ public static FileType forFile(File f, ExtractorConfig config) { /** Determine the {@link FileType} for a given file based on its extension only. */ public static FileType forFileExtension(File f) { - String lcExt = StringUtil.lc(FileUtil.extension(f)); + String lcExt = StringUtil.lc(FileUtil.extension(f)); // TODO: Here, it doesn't recognize .html.erb files for (FileType tp : values()) if (tp.getExtensions().contains(lcExt)) { return tp; From 710e79b2d5ac59483d93a1448565484756ffab01 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Tue, 14 Feb 2023 23:25:49 +0100 Subject: [PATCH 2/5] bump extractor version --- javascript/extractor/src/com/semmle/js/extractor/Main.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/Main.java b/javascript/extractor/src/com/semmle/js/extractor/Main.java index 69503631c047..a90711545a5c 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/Main.java +++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java @@ -41,7 +41,7 @@ public class Main { * A version identifier that should be updated every time the extractor changes in such a way that * it may produce different tuples for the same file under the same {@link ExtractorConfig}. */ - public static final String EXTRACTOR_VERSION = "2022-11-29"; + public static final String EXTRACTOR_VERSION = "2023-02-15"; public static final Pattern NEWLINE = Pattern.compile("\n"); From 25a846958637cdd5e59dce29baed955875b9d57c Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Wed, 15 Feb 2023 10:28:05 +0100 Subject: [PATCH 3/5] update expected output, now that .html.erb files are actually extracted --- .../extractor/tests/vue/output/trap/rails.html.erb.trap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/javascript/extractor/tests/vue/output/trap/rails.html.erb.trap b/javascript/extractor/tests/vue/output/trap/rails.html.erb.trap index 76af8c5c8d3c..2bfa0afc073d 100644 --- a/javascript/extractor/tests/vue/output/trap/rails.html.erb.trap +++ b/javascript/extractor/tests/vue/output/trap/rails.html.erb.trap @@ -1,5 +1,5 @@ -#10000=@"/rails.erb;sourcefile" -files(#10000,"/rails.erb") +#10000=@"/rails.html.erb;sourcefile" +files(#10000,"/rails.html.erb") #10001=@"/;folder" folders(#10001,"/") containerparent(#10001,#10000) From 271cc6b9614eb6bab06825cd095db283c87b88f4 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Fri, 17 Feb 2023 09:50:22 +0100 Subject: [PATCH 4/5] remove lefterover debug comment --- .../extractor/src/com/semmle/js/extractor/FileExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index b26895d82e69..5b7317cbb9b0 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -356,7 +356,7 @@ public static FileType forFile(File f, ExtractorConfig config) { /** Determine the {@link FileType} for a given file based on its extension only. */ public static FileType forFileExtension(File f) { - String lcExt = StringUtil.lc(FileUtil.extension(f)); // TODO: Here, it doesn't recognize .html.erb files + String lcExt = StringUtil.lc(FileUtil.extension(f)); for (FileType tp : values()) if (tp.getExtensions().contains(lcExt)) { return tp; From 505168f24bc7ae1ce72eddfa3e8eed31652f5423 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Mon, 27 Feb 2023 17:19:43 +0100 Subject: [PATCH 5/5] fix upper-case .html.erb files --- .../extractor/src/com/semmle/js/extractor/FileExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 5b7317cbb9b0..9c880f7490f4 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -122,7 +122,7 @@ protected boolean contains(File f, String lcExt, ExtractorConfig config) { } // for ERB files we are only interrested in `.html.erb` files if (FileUtil.extension(f).equalsIgnoreCase(".erb")) { - if (!f.getName().endsWith(".html.erb")) { + if (!f.getName().toLowerCase().endsWith(".html.erb")) { return false; } }