-
Notifications
You must be signed in to change notification settings - Fork 7.7k
Expand file tree
/
Copy pathstep_2_parse_projects.java
More file actions
165 lines (151 loc) · 5 KB
/
step_2_parse_projects.java
File metadata and controls
165 lines (151 loc) · 5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import java.io.IOException;
import java.nio.file.*;
import java.util.*;
import java.util.stream.Collectors;
/**
* Parses the markdown content to extract project entries and saves it in a temporary file.
*
* Usage: java step_2_parse_projects.java [input_file] [output_file]
*/
void main(String[] args) throws IOException {
var inputPath = args.length > 0 ? Path.of(args[0]) : Path.of(Constants.CONTRIBUTE_README_FILE);
var tmpDir = FileUtils.ensureTmpDirectory();
var outputPath = args.length > 1 ? Path.of(args[1]) : tmpDir.resolve(Constants.PARSED_PROJECTS_FILE);
System.out.println("Step 2: Parsing project entries...");
System.out.printf("Input: %s%n", inputPath.toAbsolutePath());
System.out.printf("Output: %s%n", outputPath.toAbsolutePath());
FileUtils.validateInputFile(inputPath);
var content = FileUtils.readFileContent(inputPath);
var projectEntries = parseProjectEntries(content);
writeParsedEntries(outputPath, projectEntries);
System.out.printf(
"SUCCESS: Successfully parsed %d project entries!%n",
projectEntries.size()
);
System.out.printf(
"GitHub repos: %d%n",
projectEntries.stream().filter(ProjectEntry::isGitHubRepo).count()
);
}
/**
* Parses project entries from markdown content.
*/
List<ProjectEntry> parseProjectEntries(String content) {
var lines = content.lines().toArray(String[]::new);
var projectEntries = new ArrayList<ProjectEntry>();
var inProjectSection = false;
var currentSection = "";
var currentLevel4Section = "";
for (int i = 0; i < lines.length; i++) {
var line = lines[i];
var isSubsection = line.startsWith(Constants.SUBSECTION);
var isLevel4Section = line.startsWith("####");
// Check if we're entering the Projects section
if (line.startsWith(Constants.PROJECTS_SECTION)) {
inProjectSection = true;
continue;
}
// Check if we're leaving the Projects section
if (inProjectSection &&
line.startsWith(Constants.SECTION) &&
!isSubsection &&
!line.equals(Constants.PROJECTS_SECTION)
) {
inProjectSection = false;
}
if (!inProjectSection) {
continue;
}
// Track current level 3 subsection
if (isSubsection) {
currentSection = line.substring(Constants.SUBSECTION.length());
currentLevel4Section = ""; // Reset level 4 when we hit a new level 3
continue;
}
// Track current level 4 subsection
if (isLevel4Section) {
currentLevel4Section = line.substring(4).trim(); // Remove "#### "
continue;
}
// Skip headers and empty lines
var isHeader = (line.startsWith("_") && line.endsWith("_"));
if (isHeader || line.isBlank()) {
continue;
}
// List entries
if (line.matches(Constants.ENTRY_PATTERN)) {
// Combine level 3 and level 4 sections if level 4 exists
var fullSection = currentLevel4Section.isEmpty()
? currentSection
: currentSection + "/" + currentLevel4Section;
var projectEntry = parseProjectEntry(lines, i, fullSection);
if (projectEntry != null) {
projectEntries.add(projectEntry);
i += projectEntry.linesToSkip() - 1;
}
}
}
return projectEntries;
}
/**
* Parses a single project entry from the list format.
*/
ProjectEntry parseProjectEntry(String[] lines, int startIndex, String section) {
var matcher = Constants.PROJECT_PATTERN.matcher(lines[startIndex]);
if (matcher.find()) {
var name = matcher.group(1);
var url = matcher.group(2);
var description = new StringBuilder(matcher.group(3));
var linesToSkip = 1;
// Handle multi-line descriptions
for (int i = startIndex + 1; i < lines.length; i++) {
var nextLine = lines[i];
if (nextLine.isBlank() ||
nextLine.startsWith(Constants.SECTION) ||
nextLine.startsWith(Constants.SUBSECTION) ||
nextLine.startsWith("####") ||
nextLine.matches(Constants.ENTRY_PATTERN)
) {
break;
}
if (nextLine.matches(Constants.INDENTED_LINE_PATTERN)) {
description.append(" ").append(nextLine.trim());
linesToSkip++;
} else if (!nextLine.trim().isEmpty()) {
description.append(" ").append(nextLine.trim());
linesToSkip++;
}
}
return new ProjectEntry(
name,
url,
description.toString(),
linesToSkip,
section
);
}
return null;
}
/**
* Writes parsed entries to a temporary file in a simple format.
*/
void writeParsedEntries(Path outputPath, List<ProjectEntry> entries) throws IOException {
var content = entries.stream()
.map(e -> """
%s%s
%s%s
%s%s
%s%d
%s%s
%s
""".formatted(
Constants.NAME_PREFIX, e.name(),
Constants.URL_PREFIX, e.url(),
Constants.DESC_PREFIX, e.description(),
Constants.SKIP_PREFIX, e.linesToSkip(),
Constants.SECTION_PREFIX, e.section(),
Constants.SECTION_SEPARATOR
))
.collect(Collectors.joining("\n"));
FileUtils.writeOutputFile(outputPath, content);
}