Skip to content

Commit db52a7e

Browse files
author
Peter Bengtsson
authored
dry-run with scraping and elasticsearch (github#31201)
1 parent ff533e4 commit db52a7e

4 files changed

Lines changed: 62 additions & 16 deletions

File tree

.github/workflows/dry-run-elasticsearch-indexing.yml

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,57 @@ jobs:
4646
node-version: 16.15.x
4747
cache: npm
4848

49-
- name: Install
49+
- name: Install dependencies
5050
run: npm ci
5151

52+
- name: Cache nextjs build
53+
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
54+
with:
55+
path: .next/cache
56+
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}
57+
58+
- name: Run build scripts
59+
run: npm run build
60+
61+
- name: Start the server in the background
62+
env:
63+
ENABLE_DEV_LOGGING: false
64+
run: |
65+
npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log &
66+
67+
# first sleep to give it a chance to start
68+
sleep 6
69+
curl --retry-connrefused --retry 4 -I http://localhost:4002/
70+
71+
- if: ${{ failure() }}
72+
name: Debug server outputs on errors
73+
run: |
74+
echo "____STDOUT____"
75+
cat /tmp/stdout.log
76+
echo "____STDERR____"
77+
cat /tmp/stderr.log
78+
79+
- name: Scrape records into a temp directory
80+
env:
81+
# If a reusable, or anything in the `data/*` directory is deleted
82+
# you might get a
83+
#
84+
# RenderError: Can't find the key 'site.data.reusables...' in the scope
85+
#
86+
# But that'll get fixed in the next translation pipeline. For now,
87+
# let's just accept an empty string instead.
88+
THROW_ON_EMPTY: false
89+
90+
run: |
91+
mkdir /tmp/records
92+
npm run sync-search-indices -- \
93+
--language en \
94+
--version dotcom \
95+
--out-directory /tmp/records \
96+
--no-compression --no-lunr-index
97+
98+
ls -lh /tmp/records
99+
52100
# Serves two purposes;
53101
# 1. Be confident that the Elasticsearch server start-up worked at all
54102
# 2. Sometimes Elasticsearch will bind to the port but still not
@@ -62,8 +110,8 @@ jobs:
62110
ELASTICSEARCH_URL: 'http://localhost:9200'
63111
run: |
64112
./script/search/index-elasticsearch.js --verbose \
65-
-l en -l ja \
66-
-V dotcom -V ghes-3.5
113+
-l en \
114+
-V dotcom -- /tmp/records
67115
68116
- name: Show created indexes and aliases
69117
run: |

.github/workflows/sync-search-elasticsearch.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,7 @@ jobs:
104104
- name: Index into Elasticsearch
105105
run: |
106106
./script/search/index-elasticsearch.js \
107-
--language ${{ matrix.language }} \
108-
--source-directory /tmp/records
107+
--language ${{ matrix.language }} -- /tmp/records
109108
110109
- name: Check created indexes and aliases
111110
run: |

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@
182182
"build": "next build",
183183
"debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES='en,ja' nodemon --inspect server.js",
184184
"dev": "cross-env npm start",
185-
"index-test-fixtures": "node script/search/index-elasticsearch.js -s tests/content/fixtures/search-indexes -l en -V ghae -V dotcom --index-prefix tests",
185+
"index-test-fixtures": "node script/search/index-elasticsearch.js -l en -V ghae -V dotcom --index-prefix tests -- tests/content/fixtures/search-indexes",
186186
"lint": "eslint '**/*.{js,mjs,ts,tsx}'",
187187
"lint-translation": "cross-env NODE_OPTIONS=--experimental-vm-modules TEST_TRANSLATION=true jest tests/linting/lint-files.js",
188188
"prepare": "husky install",

script/search/index-elasticsearch.js

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,29 +49,28 @@ const shortNames = Object.fromEntries(
4949

5050
const allVersionKeys = Object.keys(shortNames)
5151

52-
const DEFAULT_SOURCE_DIRECTORY = path.join('lib', 'search', 'indexes')
53-
5452
program
5553
.description('Creates Elasticsearch index from records')
5654
.option('-v, --verbose', 'Verbose outputs')
57-
.addOption(new Option('-V, --version <VERSION...>', 'Specific versions').choices(allVersionKeys))
55+
.addOption(new Option('-V, --version [VERSION...]', 'Specific versions').choices(allVersionKeys))
5856
.addOption(
5957
new Option('-l, --language <LANGUAGE...>', 'Which languages to focus on').choices(languageKeys)
6058
)
6159
.addOption(
6260
new Option('--not-language <LANGUAGE...>', 'Specific language to omit').choices(languageKeys)
6361
)
6462
.option('-u, --elasticsearch-url <url>', 'If different from $ELASTICSEARCH_URL')
65-
.option(
66-
'-s, --source-directory <DIRECTORY>',
67-
`Directory where records files are (default ${DEFAULT_SOURCE_DIRECTORY})`
68-
)
6963
.option('-p, --index-prefix <prefix>', 'Index string to put before index name')
64+
.argument('<source-directory>', 'where the indexable files are')
7065
.parse(process.argv)
7166

72-
main(program.opts())
67+
main(program.opts(), program.args)
68+
69+
async function main(opts, args) {
70+
if (!args.length) {
71+
throw new Error('Must pass the source as the first argument')
72+
}
7373

74-
async function main(opts) {
7574
if (!opts.elasticsearchUrl && !process.env.ELASTICSEARCH_URL) {
7675
throw new Error(
7776
'Must passed the elasticsearch URL option or ' +
@@ -103,7 +102,7 @@ async function main(opts) {
103102
if (verbose) {
104103
console.log(`Connecting to ${chalk.bold(safeUrlDisplay(node))}`)
105104
}
106-
const sourceDirectory = opts.sourceDirectory || DEFAULT_SOURCE_DIRECTORY
105+
const sourceDirectory = args[0]
107106
try {
108107
await fs.stat(sourceDirectory)
109108
} catch (error) {

0 commit comments

Comments
 (0)