Skip to content

Commit 1778d58

Browse files
author
Peter Bengtsson
authored
warmup remotejson cache (github#36116)
1 parent be73901 commit 1778d58

File tree

8 files changed

+137
-10
lines changed

8 files changed

+137
-10
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Warmup getRemoteJSON's cache
2+
3+
description: Run the script that prepares the disk-cache for getRemoteJSON
4+
5+
inputs:
6+
restore-only:
7+
description: Only attempt to restore, don't warm up
8+
required: false
9+
10+
runs:
11+
using: 'composite'
12+
steps:
13+
# The caching technique here is to unboundedly add and add to the cache.
14+
# You "wrap" the step that appends to disk and it will possibly retrieve
15+
# some from the cache, then save it when it's got more in it.
16+
- name: Cache .remotejson-cache (restore)
17+
# You can't use a SHA on these. Only possible with `actions/cache@SHA...`
18+
uses: actions/cache/restore@v3
19+
with:
20+
path: .remotejson-cache
21+
key: remotejson-cache-
22+
restore-keys: remotejson-cache-
23+
24+
# When we use this composite action from the workflows like
25+
# Azure Preview Deploy and Azure Production Deploy, we don't have
26+
# any Node installed or any of its packages. I.e. we never
27+
# run `npm ci` in those actions. For security sake.
28+
# So we can't do things that require Node code.
29+
# Tests and others will omit the `restore-only` input, but
30+
# prepping for Docker build and push, will set it to a non-empty
31+
# string which basically means "If you can restore it, great.
32+
# If not, that's fine, don't bother".
33+
- name: Run script
34+
if: ${{ inputs.restore-only == '' }}
35+
shell: bash
36+
run: node script/warmup-remotejson.js
37+
38+
- name: Cache .remotejson-cache (save)
39+
if: ${{ inputs.restore-only == '' }}
40+
uses: actions/cache/save@v3
41+
with:
42+
path: .remotejson-cache
43+
key: remotejson-cache-${{ github.sha }}

.github/workflows/azure-preview-env-deploy.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ jobs:
175175
rsync -rptovR ./user-code/pages/./**/*.tsx ./pages
176176
rsync -rptovR ./user-code/stylesheets/./**/*.scss ./stylesheets
177177
178+
- uses: ./.github/actions/warmup-remotejson-cache
179+
with:
180+
restore-only: true
181+
178182
# In addition to making the final image smaller, we also save time by not sending unnecessary files to the docker build context
179183
- name: 'Prune for preview env'
180184
run: .github/actions-scripts/prune-for-preview-env.sh

.github/workflows/azure-prod-build-deploy.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ jobs:
7171
- name: Merge docs-early-access repo's folders
7272
run: .github/actions-scripts/merge-early-access.sh
7373

74+
- uses: ./.github/actions/warmup-remotejson-cache
75+
with:
76+
restore-only: true
77+
7478
- uses: ./.github/actions/clone-translations
7579
with:
7680
token: ${{ secrets.DOCUBOT_REPO_PAT }}

.github/workflows/keep-caches-warm.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,5 @@ jobs:
3333

3434
- name: Build
3535
run: npm run build
36+
37+
- uses: ./.github/actions/warmup-remotejson-cache

.github/workflows/test.yml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,10 @@ jobs:
153153
- name: Run build script
154154
run: npm run build
155155

156-
- name: Disk cache used by getRemoteJSON function in middleware
157-
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
158-
with:
159-
path: .remotejson-cache
160-
# Very liberal cache key. Note, for this to become populated
161-
# for other branches, you have to manually run this workflow
162-
# at least once using the "Run workflow" button.
163-
key: ${{ runner.os }}-remotejson
156+
- uses: ./.github/actions/warmup-remotejson-cache
157+
# Only the 'routing' tests include end-to-end tests about
158+
# archived enterprise server URLs.
159+
if: ${{ matrix.name == 'routing' }}
164160

165161
- name: Index fixtures into the local Elasticsearch
166162
# For the sake of saving time, only run this step if the group

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ COPY pages ./pages
4949
COPY components ./components
5050
COPY lib ./lib
5151
COPY src ./src
52+
# The star is because it's an optional directory
53+
COPY .remotejson-cache* ./.remotejson-cache
5254
# Certain content is necessary for being able to build
5355
COPY content/index.md ./content/index.md
5456
COPY content/rest ./content/rest
@@ -90,6 +92,7 @@ COPY --chown=node:node assets ./assets
9092
COPY --chown=node:node content ./content
9193
COPY --chown=node:node lib ./lib
9294
COPY --chown=node:node src ./src
95+
COPY --chown=node:node .remotejson-cache* ./.remotejson-cache
9396
COPY --chown=node:node middleware ./middleware
9497
COPY --chown=node:node data ./data
9598
COPY --chown=node:node next.config.js ./

middleware/get-remote-json.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ export default async function getRemoteJSON(url, config) {
4242
const ROOT = process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT || '.remotejson-cache'
4343

4444
const onDisk = path.join(ROOT, `${tempFilename}.json`)
45-
// Never even try reading from disk in production.
46-
if (!inProd && fs.existsSync(onDisk)) {
45+
46+
try {
4747
const body = fs.readFileSync(onDisk, 'utf-8')
4848
// It might exist on disk, but it could be empty
4949
if (body) {
@@ -58,6 +58,10 @@ export default async function getRemoteJSON(url, config) {
5858
}
5959
}
6060
}
61+
} catch (error) {
62+
if (!(error instanceof SyntaxError || error.code === 'ENOENT')) {
63+
throw error
64+
}
6165
}
6266

6367
if (!foundOnDisk) {

script/warmup-remotejson.js

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env node
2+
3+
// [start-readme]
4+
//
5+
// This calls a function directly that is used by our archived enterprise
6+
// middleware. Namely, the `getRemoteJSON` function. That function is
7+
// able to use the disk to cache responses quite aggressively. So when
8+
// it's been run once, with the same disk, next time it can draw from disk
9+
// rather than having to rely on network.
10+
//
11+
// We have this script to avoid excessive network fetches in production
12+
// where, due to production deploys restarting new Node services, we
13+
// can't rely on in-memory caching often enough.
14+
//
15+
// The list of URLs hardcoded in here is based on analyzing the URLs that
16+
// were logged as tags in Datadog for entries that couldn't rely on
17+
// in-memory cache.
18+
//
19+
// [end-readme]
20+
21+
import { program } from 'commander'
22+
import semver from 'semver'
23+
24+
import getRemoteJSON from '../middleware/get-remote-json.js'
25+
import {
26+
deprecated,
27+
firstReleaseStoredInBlobStorage,
28+
lastVersionWithoutArchivedRedirectsFile,
29+
} from '../lib/enterprise-server-releases.js'
30+
31+
program
32+
.description(
33+
"Visit a bunch of archived redirects.json URLs to warm up getRemoteJSON's disk cache"
34+
)
35+
.parse(process.argv)
36+
37+
main()
38+
39+
function version2url(version) {
40+
const inBlobStorage = semver.gte(
41+
semver.coerce(version).raw,
42+
semver.coerce(firstReleaseStoredInBlobStorage).raw
43+
)
44+
return inBlobStorage
45+
? `https://githubdocs.azureedge.net/enterprise/${version}/redirects.json`
46+
: `https://github.github.com/help-docs-archived-enterprise-versions/${version}/redirects.json`
47+
}
48+
49+
function withArchivedRedirectsFile(version) {
50+
return semver.eq(
51+
semver.coerce(version).raw,
52+
semver.coerce(lastVersionWithoutArchivedRedirectsFile).raw
53+
)
54+
}
55+
56+
async function main() {
57+
const urls = []
58+
for (const version of deprecated) {
59+
if (withArchivedRedirectsFile(version)) {
60+
break
61+
}
62+
urls.push(version2url(version))
63+
}
64+
const config = {
65+
retry: { limit: 3 },
66+
timeout: { response: 1000 },
67+
}
68+
console.time(`Time to fetch ${urls.length} URLs`)
69+
await Promise.all(urls.map((url) => getRemoteJSON(url, config)))
70+
console.timeEnd(`Time to fetch ${urls.length} URLs`)
71+
}

0 commit comments

Comments
 (0)