Skip to content

Commit b03a94f

Browse files
committed
Add device width option, improving others.
1 parent 255df9c commit b03a94f

3 files changed

Lines changed: 188 additions & 109 deletions

File tree

cli.tsx

Lines changed: 3 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,22 @@
11
#!/usr/bin/env node
22
import React, { useEffect, useState } from "react";
33
import { render, Box, Text, useApp } from "ink";
4-
import yargs from "yargs";
5-
import { hideBin } from "yargs/helpers";
64
import {
75
Crawler,
86
CrawlConfig,
97
CrawlProgress,
108
CrawlSummary,
119
} from "./crawler.js";
1210

11+
import { argsToConfig } from "./config.js";
12+
1313
interface State {
1414
progress: CrawlProgress;
1515
summary?: CrawlSummary;
1616
startTime: number;
1717
}
1818

19-
// --no- is reserved for negation
20-
const argv = yargs(hideBin(process.argv))
21-
.scriptName("crawler")
22-
.option("url", {
23-
type: "string",
24-
demandOption: true,
25-
desc: "Root URL to crawl",
26-
})
27-
.option("out-dir", {
28-
type: "string",
29-
default: "./out",
30-
desc: "Output directory",
31-
})
32-
.option("browser", {
33-
type: "string",
34-
default: "../chrome/linux-141.0.7378.3/chrome-linux64/chrome",
35-
desc: "Chromium/Chrome executable path",
36-
})
37-
.option("headless", {
38-
type: "boolean",
39-
default: true,
40-
desc: "Run browser with headful mode",
41-
})
42-
.option("breakpoints", {
43-
type: "string",
44-
default: "",
45-
desc: "Comma-separated breakpoint widths",
46-
})
47-
.option("device-scale-factor", {
48-
type: "number",
49-
default: 1,
50-
desc: "Device scale factor",
51-
})
52-
.option("screen-height", {
53-
type: "number",
54-
default: 800,
55-
desc: "Viewport height for screens",
56-
})
57-
.option("recursive", {
58-
type: "boolean",
59-
default: false,
60-
desc: "Recursively crawl same-site links",
61-
})
62-
.option("browser-scan", {
63-
type: "boolean",
64-
default: false,
65-
desc: "Use browser for link extraction (slower, handling JS)",
66-
})
67-
.option("max-pages", {
68-
type: "number",
69-
default: 0,
70-
desc: "Maximum number of pages to crawl (0 = no limit)",
71-
})
72-
.option("delay-after-nav", {
73-
type: "number",
74-
default: 1000,
75-
desc: "Delay ms after navigation before processing",
76-
})
77-
.help()
78-
.parseSync();
79-
80-
const breakpoints = argv.breakpoints
81-
? argv.breakpoints
82-
.split(",")
83-
.map((s) => parseInt(s.trim()))
84-
.filter((n) => !isNaN(n))
85-
: [];
86-
87-
const config: CrawlConfig = {
88-
url: argv.url,
89-
outDir: argv["out-dir"],
90-
browserPath: argv.browser,
91-
headless: argv["headless"],
92-
breakpoints,
93-
screenHeight: argv["screen-height"],
94-
deviceScaleFactor: argv["device-scale-factor"],
95-
recursive: argv["recursive"],
96-
browserScan: argv["browser-scan"],
97-
maxPages: argv["max-pages"],
98-
delayAfterNavigateMs: argv["delay-after-nav"],
99-
};
19+
const config = argsToConfig();
10020

10121
const Dashboard: React.FC<{ cfg: CrawlConfig }> = ({ cfg }) => {
10222
const { exit } = useApp();

config.ts

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
import yargs from "yargs";
2+
import { hideBin } from "yargs/helpers";
3+
import path from "path";
4+
import fs from "fs";
5+
6+
import { CrawlConfig } from "./crawler.js";
7+
8+
const defaultBrowserCmds = ["chrome", "chromium", "google-chrome"];
9+
10+
function isExecutable(filePath: string) {
11+
try {
12+
fs.accessSync(filePath, fs.constants.X_OK);
13+
return true;
14+
} catch {
15+
return false;
16+
}
17+
}
18+
19+
function isInPath(cmd: string): boolean {
20+
const exts = process.platform === "win32" ? ["", ".exe"] : [""];
21+
const dirs = process.env.PATH?.split(path.delimiter) ?? [];
22+
for (const dir of dirs) {
23+
for (const ext of exts) {
24+
const fullPath = path.join(dir, cmd + ext);
25+
if (fs.existsSync(fullPath) && isExecutable(fullPath)) {
26+
return true;
27+
}
28+
}
29+
}
30+
return false;
31+
}
32+
33+
function validateWidths(breakpoints: number[], deviceWidths: number[]) {
34+
if (deviceWidths.length !== breakpoints.length + 1) {
35+
console.error(
36+
`Error: device-widths must have exactly one more entry than breakpoint-widths.\n` +
37+
`Got ${deviceWidths.length} device widths and ${breakpoints.length} breakpoints.`,
38+
);
39+
process.exit(1);
40+
}
41+
for (let i = 0; i < breakpoints.length; i++) {
42+
if (
43+
!(
44+
deviceWidths[i] < breakpoints[i] && breakpoints[i] < deviceWidths[i + 1]
45+
)
46+
) {
47+
console.error(
48+
`Error: deviceWidths and breakpoints must interleave: deviceWidths[${i}] < breakpoints[${i}] < deviceWidths[${i + 1}].\n` +
49+
`Got deviceWidths[${i}]=${deviceWidths[i]}, breakpoints[${i}]=${breakpoints[i]}, deviceWidths[${i + 1}]=${deviceWidths[i + 1]}.`,
50+
);
51+
process.exit(1);
52+
}
53+
if (deviceWidths[i] <= 0) {
54+
console.error(
55+
`Error: deviceWidths must be positive numbers.\n` +
56+
`Got deviceWidths[${i}]=${deviceWidths[i]}.`,
57+
);
58+
process.exit(1);
59+
}
60+
}
61+
}
62+
63+
function toNumList(arg: string): number[] {
64+
return arg
65+
? arg
66+
.split(",")
67+
.map((s) => parseInt(s.trim()))
68+
.filter((n) => !isNaN(n))
69+
: [];
70+
}
71+
72+
export function argsToConfig(): CrawlConfig {
73+
// --no- is reserved for negation
74+
const argv = yargs(hideBin(process.argv))
75+
.scriptName("crawler")
76+
.option("url", {
77+
type: "string",
78+
demandOption: true,
79+
desc: "Page URL to crawl",
80+
})
81+
.option("out-dir", {
82+
alias: "o",
83+
type: "string",
84+
default: "./out",
85+
desc: "Output directory",
86+
})
87+
.option("browser-path", {
88+
alias: "b",
89+
type: "string",
90+
desc: `Chromium/Chrome executable path, default will try ${defaultBrowserCmds.join(", ")}`,
91+
})
92+
.option("disable-headless", {
93+
type: "boolean",
94+
default: false,
95+
desc: "Run browser with headful mode",
96+
})
97+
.option("device-widths", {
98+
type: "string",
99+
default: "1024",
100+
desc: "Comma-separated device widths for crawling",
101+
})
102+
.option("breakpoint-widths", {
103+
type: "string",
104+
default: "",
105+
desc: "Comma-separated widths for @media condition, must interleave device-widths",
106+
})
107+
.option("device-scale-factor", {
108+
type: "number",
109+
default: 1,
110+
desc: "Device scale factor",
111+
})
112+
.option("screen-height", {
113+
type: "number",
114+
default: 800,
115+
desc: "Viewport height for screens",
116+
})
117+
.option("recursive", {
118+
alias: "r",
119+
type: "boolean",
120+
default: false,
121+
desc: "Recursively crawl same-site links",
122+
})
123+
.option("browser-scan", {
124+
type: "boolean",
125+
default: false,
126+
desc: "Use browser for link extraction (slower, handling JS)",
127+
})
128+
.option("max-pages", {
129+
type: "number",
130+
default: 0,
131+
desc: "Maximum number of pages to crawl (0 = no limit)",
132+
})
133+
.option("delay-after-nav", {
134+
type: "number",
135+
default: 1000,
136+
desc: "Delay ms after navigation before processing",
137+
})
138+
.help()
139+
.parseSync();
140+
141+
const breakpoints = toNumList(argv["breakpoint-widths"]);
142+
const deviceWidths = toNumList(argv["device-widths"]);
143+
144+
validateWidths(breakpoints, deviceWidths);
145+
146+
var browserPath: string | undefined = argv["browser-path"];
147+
if (!browserPath) {
148+
for (const cmd of defaultBrowserCmds) {
149+
if (isInPath(cmd)) {
150+
browserPath = cmd;
151+
console.log(`Using browser executable: ${cmd}`);
152+
break;
153+
}
154+
}
155+
if (!browserPath) {
156+
console.error(`Error: Chrome/Chromium executable not found in PATH.
157+
Please specify the path with --browser-path.
158+
Tried: ${defaultBrowserCmds.join(", ")}`);
159+
process.exit(1);
160+
}
161+
}
162+
163+
return {
164+
url: argv.url,
165+
outDir: argv["out-dir"],
166+
browserPath: browserPath,
167+
headless: !argv["disable-headless"],
168+
screenHeight: argv["screen-height"],
169+
breakpoints,
170+
deviceWidths,
171+
deviceScaleFactor: argv["device-scale-factor"],
172+
recursive: argv["recursive"],
173+
browserScan: argv["browser-scan"],
174+
maxPages: argv["max-pages"],
175+
delayAfterNavigateMs: argv["delay-after-nav"],
176+
};
177+
}

crawler.ts

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ export interface CrawlConfig {
3535
outDir: string;
3636
browserPath: string;
3737
headless: boolean;
38-
breakpoints: number[];
3938
screenHeight: number;
39+
breakpoints: number[];
40+
deviceWidths: number[];
4041
deviceScaleFactor: number;
4142
recursive: boolean;
4243
browserScan: boolean;
@@ -480,32 +481,13 @@ export class Crawler extends EventEmitter {
480481
}
481482

482483
private buildScreens() {
483-
const breakpoints = this.cfg.breakpoints;
484-
const screens: { width: number; height: number; mobile: boolean }[] = [];
485-
if (breakpoints.length === 0) {
486-
screens.push({
487-
width: 1280,
488-
height: this.cfg.screenHeight,
489-
mobile: false,
490-
});
491-
} else {
492-
for (let i = 0; i < breakpoints.length; i++) {
493-
let width: number;
494-
if (i === 0) width = Math.round(breakpoints[i] / 2);
495-
else width = Math.round((breakpoints[i] + breakpoints[i + 1]) / 2);
496-
screens.push({
497-
width,
498-
height: this.cfg.screenHeight,
499-
mobile: false,
500-
});
501-
}
502-
screens.push({
503-
width: breakpoints[breakpoints.length - 1] + 100,
484+
return this.cfg.deviceWidths.map((width, i) => {
485+
return {
486+
width,
504487
height: this.cfg.screenHeight,
505-
mobile: false,
506-
});
507-
}
508-
return screens;
488+
mobile: false, // TODO: option? but seems not matter if no touch event
489+
};
490+
});
509491
}
510492

511493
private async traverse(

0 commit comments

Comments
 (0)