-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathurl.js
More file actions
290 lines (255 loc) · 9.93 KB
/
url.js
File metadata and controls
290 lines (255 loc) · 9.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import path from 'path';
import * as mime from 'mime-types';
// Base directory for storing all pods
// Use a getter function to read env var at runtime (not import time)
// This is necessary because ES modules are loaded before the CLI sets the env var
export function getDataRoot() {
return process.env.DATA_ROOT || './data';
}
// Legacy export - kept for compatibility, but callers should use getDataRoot()
export let DATA_ROOT = './data';
// Update DATA_ROOT when env var is set (called from storage init)
export function updateDataRoot() {
DATA_ROOT = getDataRoot();
}
/**
* Convert URL path to filesystem path
* @param {string} urlPath - The URL path (e.g., /alice/profile/)
* @returns {string} - Filesystem path
* @throws {Error} - If path traversal is detected
*/
export function urlToPath(urlPath) {
// Normalize: strip all leading slashes (#131 — `//foo` from bot probes
// would otherwise leave `/foo`, and path.resolve(root, '/foo') would
// treat the second arg as absolute, escape dataRoot, and trip the
// traversal guard with a 500 instead of resolving cleanly to a 404).
let normalized = urlPath.replace(/^\/+/, '');
normalized = decodeURIComponent(normalized);
// Security: remove path traversal attempts (multiple passes for ....// bypass)
let previous;
do {
previous = normalized;
normalized = normalized.replace(/\.\./g, '');
} while (normalized !== previous);
// Resolve to absolute path and verify it's within DATA_ROOT
const dataRoot = path.resolve(getDataRoot());
const resolved = path.resolve(dataRoot, normalized);
// Ensure resolved path is within dataRoot (prevent traversal via path.resolve tricks)
if (!resolved.startsWith(dataRoot + path.sep) && resolved !== dataRoot) {
throw new Error('Path traversal detected');
}
return resolved;
}
/**
* Convert URL path to filesystem path in subdomain mode
* In subdomain mode, the pod is determined by the hostname, not the path
* @param {string} urlPath - The URL path (e.g., /public/file.txt)
* @param {string} podName - The pod name from subdomain (e.g., "alice")
* @returns {string} - Filesystem path (e.g., DATA_ROOT/alice/public/file.txt)
* @throws {Error} - If path traversal is detected
*/
export function urlToPathWithPod(urlPath, podName) {
// Normalize: strip all leading slashes (#131 — see urlToPath for context).
let normalized = urlPath.replace(/^\/+/, '');
normalized = decodeURIComponent(normalized);
// Security: remove path traversal attempts (multiple passes for ....// bypass)
let previous;
do {
previous = normalized;
normalized = normalized.replace(/\.\./g, '');
} while (normalized !== previous);
// Also sanitize podName (multiple passes for ....// bypass)
let safePodName = podName;
let previousPod;
do {
previousPod = safePodName;
safePodName = safePodName.replace(/\.\./g, '');
} while (safePodName !== previousPod);
// Resolve to absolute path and verify it's within DATA_ROOT
const dataRoot = path.resolve(getDataRoot());
const resolved = path.resolve(dataRoot, safePodName, normalized);
// Ensure resolved path is within dataRoot (prevent traversal via path.resolve tricks)
if (!resolved.startsWith(dataRoot + path.sep) && resolved !== dataRoot) {
throw new Error('Path traversal detected');
}
return resolved;
}
/**
* Get the effective path for a request (subdomain-aware)
* @param {object} request - Fastify request object
* @returns {string} - Filesystem path
*/
export function getPathFromRequest(request) {
const urlPath = request.url.split('?')[0];
// In subdomain mode with a recognized pod subdomain
if (request.subdomainsEnabled && request.podName) {
return urlToPathWithPod(urlPath, request.podName);
}
// Path-based mode (default)
return urlToPath(urlPath);
}
/**
* Get the effective URL path for a request (with pod prefix in subdomain mode)
* @param {object} request - Fastify request object
* @returns {string} - URL path with pod prefix if needed
*/
export function getEffectiveUrlPath(request) {
const urlPath = request.url.split('?')[0];
// In subdomain mode with a recognized pod subdomain, prepend pod name
if (request.subdomainsEnabled && request.podName) {
return '/' + request.podName + urlPath;
}
return urlPath;
}
/**
* Check if URL path represents a container (ends with /)
* @param {string} urlPath
* @returns {boolean}
*/
export function isContainer(urlPath) {
return urlPath.endsWith('/');
}
/**
* Get the parent container path
* @param {string} urlPath
* @returns {string}
*/
export function getParentContainer(urlPath) {
const parts = urlPath.replace(/\/$/, '').split('/');
parts.pop();
return parts.join('/') + '/';
}
/**
* Get resource name from URL path
* @param {string} urlPath
* @returns {string}
*/
export function getResourceName(urlPath) {
const parts = urlPath.replace(/\/$/, '').split('/');
return parts[parts.length - 1];
}
/**
* Extract pod name from URL path or request
*
* Resolves to one of four shapes, by deployment mode:
*
* - Subdomain mode with a recognized subdomain → `request.podName` (from hostname).
* - Subdomain mode with no recognized subdomain → `null` (base-domain access;
* callers guard with `if (podName)` and skip pod-scoped side effects).
* - Single-user, root-pod (`singleUserName` empty or '/') → `'.'` so
* `path.join(dataRoot, '.', QUOTA_FILE)` collapses to `<dataRoot>/QUOTA_FILE`.
* - Single-user, named pod → `singleUserName` (all requests share the one pod,
* independent of URL — avoids mistaking a URL segment like `index.html`
* for a pod name).
* - Path-based multi-pod (default, no flags) → first URL segment, or `null`
* for requests at `/` that aren't inside any pod.
*
* Background: before this function knew about single-user mode, a
* `PUT /index.html` on a single-user root-pod deployment produced a pod name
* of `"index.html"`, and the quota sidecar landed at
* `<dataRoot>/index.html/.quota.json` → `ENOTDIR` (index.html is a file).
*
* @param {string|object} pathOrRequest - URL path string or Fastify request object
* @returns {string|null} - Pod name, `'.'` for root-pod, or `null` when no pod applies
*/
export function getPodName(pathOrRequest) {
if (typeof pathOrRequest === 'object' && pathOrRequest !== null) {
// Subdomain mode: hostname drives it. Unrecognized host → no pod.
if (pathOrRequest.subdomainsEnabled) {
return pathOrRequest.podName || null;
}
// Single-user mode: always the one pod, regardless of URL path.
if (pathOrRequest.singleUser) {
const name = pathOrRequest.singleUserName;
return (!name || name === '/') ? '.' : name;
}
// Path-based multi-pod: first URL segment.
const urlPath = pathOrRequest.url?.split('?')[0] || '';
return getPodNameFromPath(urlPath);
}
// String form: path-based pod extraction.
return getPodNameFromPath(pathOrRequest);
}
/**
* Extract pod name from URL path
* @param {string} urlPath - URL path (e.g., /alice/public/file.txt)
* @returns {string|null} - Pod name or null
*/
function getPodNameFromPath(urlPath) {
const parts = urlPath.split('/').filter(Boolean);
if (parts.length === 0) return null;
// First segment is the pod name (skip system paths)
const firstPart = parts[0];
if (firstPart.startsWith('.')) return null; // .well-known, .acl, etc.
return firstPart;
}
/**
* Determine content type from file extension
* @param {string} filePath
* @returns {string}
*/
export function getContentType(filePath) {
const ext = path.extname(filePath).toLowerCase();
// Solid-specific overrides — types the `mime-types` db doesn't know, or
// where Solid semantics differ. Checked before falling back to mime-types
// (which covers the long tail: audio/video/fonts/archives/office/etc.).
const overrides = {
'.jsonld': 'application/ld+json',
'.ttl': 'text/turtle',
'.n3': 'text/n3',
'.nt': 'application/n-triples',
'.rdf': 'application/rdf+xml',
'.nq': 'application/n-quads',
'.trig': 'application/trig',
'.m3u': 'audio/mpegurl',
'.pls': 'audio/x-scpls',
// Solid ACL/meta as extensions (e.g. publicTypeIndex.jsonld.acl)
'.acl': 'application/ld+json',
'.meta': 'application/ld+json'
};
// Solid convention dotfiles (.acl, .meta) are RDF resources. path.extname
// returns '' for leading-dot names, so the map lookup above misses them;
// fall back to a basename check and tag them as JSON-LD — the format JSS
// writes them in via serializeAcl() / createPodStructure(). Content
// negotiation then handles Turtle-native clients (umai, Soukai-based apps,
// older Solid tooling) via handleGet's conneg branch.
const base = path.basename(filePath);
if (base === '.acl' || base === '.meta') return 'application/ld+json';
// Overrides first, then the comprehensive mime-types database (as CSS and
// NSS do), then octet-stream. This is what makes audio/video/etc. resolve
// to a real type instead of forcing a download. See #533.
return overrides[ext] || mime.lookup(filePath) || 'application/octet-stream';
}
/**
* Check if content type is RDF
* @param {string} contentType
* @returns {boolean}
*/
export function isRdfContentType(contentType) {
const rdfTypes = [
'application/ld+json',
'application/json',
'text/turtle',
'text/n3',
'application/n-triples',
'application/rdf+xml',
'application/n-quads',
'application/trig'
];
return rdfTypes.includes(contentType);
}
// Security: Maximum JSON size for parsing (10MB)
const MAX_JSON_SIZE = 10 * 1024 * 1024;
/**
* Safely parse JSON with size limit to prevent DoS
* @param {string} jsonString - The JSON string to parse
* @param {number} maxSize - Maximum allowed size (default 10MB)
* @returns {object} - Parsed JSON object
* @throws {Error} - If JSON is too large or invalid
*/
export function safeJsonParse(jsonString, maxSize = MAX_JSON_SIZE) {
if (jsonString.length > maxSize) {
throw new Error(`JSON exceeds maximum size of ${maxSize} bytes`);
}
return JSON.parse(jsonString);
}