Skip to content

Commit 8d417c6

Browse files
mcroompkristofr
andauthored
added diag (#34)
Co-authored-by: kristofr <kristofr+odspmdb@microsoft.com>
1 parent f7fdbf9 commit 8d417c6

2 files changed

Lines changed: 219 additions & 3 deletions

File tree

scripts/entrypoint.sh

Lines changed: 200 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
# Combine with --background [--disown].
1111
# --log [--indexer|--error] [-n N]
1212
# Tail the server, indexer, or error log.
13+
# --diag Run startup diagnostics and print what
14+
# is wrong (binary, config, ports, locks,
15+
# data dir, HTTP health). Exits 0 if OK,
16+
# 1 if any check failed.
1317
#
1418
# Docker mode (no flags):
1519
# Run Typesense in the foreground; process supervisor keeps container alive.
@@ -30,6 +34,7 @@ LOG=0
3034
LOG_INDEXER=0
3135
LOG_ERROR=0
3236
LOG_LINES=40
37+
DIAG=0
3338

3439
while [[ $# -gt 0 ]]; do
3540
case "$1" in
@@ -40,6 +45,7 @@ while [[ $# -gt 0 ]]; do
4045
--log) LOG=1 ;;
4146
--indexer) LOG_INDEXER=1 ;;
4247
--error) LOG_ERROR=1 ;;
48+
--diag) DIAG=1 ;;
4349
-n) shift; LOG_LINES="${1:-40}" ;;
4450
*) echo "[entrypoint] Unknown flag: $1" >&2; exit 1 ;;
4551
esac
@@ -71,7 +77,10 @@ _check_running() {
7177
[ -f "$pid_file" ] || return 1
7278
pid=$(cat "$pid_file" 2>/dev/null) || return 1
7379
[[ "$pid" =~ ^[0-9]+$ ]] || return 1
74-
kill -0 "$pid" 2>/dev/null
80+
kill -0 "$pid" 2>/dev/null || return 1
81+
# Verify the live process is actually typesense-server (prevents stale PID
82+
# collisions after WSL restart, where the PID may be reused by a different process).
83+
grep -ql "typesense" /proc/"$pid"/cmdline 2>/dev/null
7584
}
7685

7786
# ── Helper: stop a running Typesense instance ─────────────────────────────────
@@ -102,6 +111,196 @@ _stop_typesense() {
102111
fi
103112
}
104113

114+
# ── Diag: check what might prevent Typesense from starting ────────────────────
115+
116+
if [ "$DIAG" = "1" ]; then
117+
set +e
118+
_PASS=0; _WARN=0; _FAIL=0
119+
_ok() { printf ' [OK] %s\n' "$*"; _PASS=$((_PASS+1)); }
120+
_warn() { printf ' [~~] %s\n' "$*"; _WARN=$((_WARN+1)); }
121+
_fail() { printf ' [!!] %s\n' "$*"; _FAIL=$((_FAIL+1)); }
122+
_info() { printf ' %s\n' "$*"; }
123+
_head() { printf '\n[diag] %s\n' "$*"; }
124+
125+
printf '[diag] ── Typesense startup diagnostics ─────────────────────────────\n'
126+
printf '[diag] data dir : %s\n' "$TYPESENSE_DATA"
127+
printf '[diag] config : %s\n' "$CONFIG_FILE"
128+
printf '[diag] python : %s\n' "$PYTHON3"
129+
130+
# ── 1. Binary ──────────────────────────────────────────────────────────
131+
_head "1/7 Binary"
132+
_DIAG_BIN=""
133+
for _cand in \
134+
"${TYPESENSE_DIR:+${TYPESENSE_DIR}/typesense-server}" \
135+
"${TYPESENSE_DATA}/typesense-server" \
136+
"${HOME}/.local/typesense/typesense-server"
137+
do
138+
[ -n "$_cand" ] && [ -x "$_cand" ] && { _DIAG_BIN="$_cand"; break; }
139+
done
140+
if [ -z "$_DIAG_BIN" ]; then
141+
_fail "typesense-server not found or not executable"
142+
_info "Checked: \${TYPESENSE_DATA}/typesense-server, ~/.local/typesense/typesense-server"
143+
_info "Fix: run setup.cmd (installs the binary)"
144+
else
145+
_VER=$("$_DIAG_BIN" --version 2>&1 | grep -oE 'Typesense [0-9.]+' | head -1)
146+
_ok "$_DIAG_BIN ${_VER:-(version unknown)}"
147+
fi
148+
149+
# ── 2. Config ──────────────────────────────────────────────────────────
150+
_head "2/7 Config"
151+
_DIAG_PORT=""
152+
_DIAG_KEY=""
153+
if [ ! -f "$CONFIG_FILE" ]; then
154+
_fail "Not found: $CONFIG_FILE"
155+
_info "Fix: run setup.cmd to create config.json"
156+
elif [ ! -s "$CONFIG_FILE" ]; then
157+
_fail "File is empty: $CONFIG_FILE"
158+
else
159+
_DIAG_PORT=$("$PYTHON3" -c \
160+
"import json,sys; d=json.load(open('$CONFIG_FILE')); print(d['port'])" 2>/dev/null)
161+
_DIAG_KEY=$("$PYTHON3" -c \
162+
"import json; d=json.load(open('$CONFIG_FILE')); print(d.get('api_key',''))" 2>/dev/null)
163+
if [ -z "$_DIAG_PORT" ]; then
164+
_fail "Could not read 'port' from config (missing key or Python error)"
165+
_info "Config : $CONFIG_FILE"
166+
_info "Python : $PYTHON3"
167+
"$PYTHON3" -c "import json" 2>/dev/null \
168+
|| _info "(Python interpreter may be missing or broken)"
169+
else
170+
_ok "$CONFIG_FILE (port=$_DIAG_PORT key=${_DIAG_KEY:0:8}...)"
171+
[ -z "$_DIAG_KEY" ] && _warn "api_key is empty in config"
172+
fi
173+
fi
174+
_DIAG_PORT="${_DIAG_PORT:-${CODESEARCH_PORT}}"
175+
176+
# ── 3. Data directory ──────────────────────────────────────────────────
177+
_head "3/7 Data directory"
178+
if [ ! -e "$TYPESENSE_DATA" ]; then
179+
_warn "Not found: $TYPESENSE_DATA (will be created on first start)"
180+
elif [ ! -d "$TYPESENSE_DATA" ]; then
181+
_fail "$TYPESENSE_DATA exists but is not a directory"
182+
else
183+
_ok "$TYPESENSE_DATA"
184+
if [ ! -w "$TYPESENSE_DATA" ]; then
185+
_fail "Not writable — check permissions"
186+
_info "Owner: $(stat -c '%U:%G mode %a' "$TYPESENSE_DATA" 2>/dev/null)"
187+
_info "Fix: sudo chown -R \$USER '$TYPESENSE_DATA'"
188+
fi
189+
_DF=$(df -m "$TYPESENSE_DATA" 2>/dev/null | awk 'NR==2{print $4}')
190+
if [ -n "$_DF" ]; then
191+
if [ "$_DF" -lt 200 ]; then _fail "Very low disk space: ${_DF} MB free"
192+
elif [ "$_DF" -lt 1024 ]; then _warn "Low disk space: ${_DF} MB free"
193+
else _info "Disk free: ${_DF} MB"
194+
fi
195+
fi
196+
fi
197+
198+
# ── 4. RocksDB lock ────────────────────────────────────────────────────
199+
_head "4/7 RocksDB lock"
200+
_LOCK="${TYPESENSE_DATA}/data/db/LOCK"
201+
if [ ! -f "$_LOCK" ]; then
202+
_ok "No lock file"
203+
else
204+
_LOCK_HOLDER=""
205+
if command -v lsof &>/dev/null; then
206+
_LOCK_HOLDER=$(lsof "$_LOCK" 2>/dev/null | awk 'NR>1{print $1,"(pid "$2")"}' | head -1)
207+
elif command -v fuser &>/dev/null; then
208+
_LOCK_HOLDER=$(fuser "$_LOCK" 2>/dev/null)
209+
fi
210+
if [ -n "$_LOCK_HOLDER" ]; then
211+
_ok "Lock held by: $_LOCK_HOLDER (Typesense is running)"
212+
else
213+
_fail "Stale lock file: $_LOCK"
214+
_info "This will prevent Typesense from starting."
215+
_info "Fix: rm '$_LOCK'"
216+
fi
217+
fi
218+
219+
# ── 5. PID file and process ────────────────────────────────────────────
220+
_head "5/7 Process"
221+
if [ ! -f "$TYPESENSE_PID_FILE" ]; then
222+
_info "No PID file — Typesense is not running (or was cleanly stopped)"
223+
else
224+
_DIAG_PID=$(cat "$TYPESENSE_PID_FILE" 2>/dev/null)
225+
if [[ "$_DIAG_PID" =~ ^[0-9]+$ ]]; then
226+
if kill -0 "$_DIAG_PID" 2>/dev/null; then
227+
if grep -ql "typesense" /proc/"$_DIAG_PID"/cmdline 2>/dev/null; then
228+
_ok "Typesense running (pid=$_DIAG_PID)"
229+
else
230+
_CMD=$(tr '\0' ' ' </proc/"$_DIAG_PID"/cmdline 2>/dev/null | cut -c1-60)
231+
_fail "PID $_DIAG_PID is alive but is NOT typesense-server: $_CMD"
232+
_info "Stale PID file from a previous WSL session."
233+
_info "Fix: rm '$TYPESENSE_PID_FILE'"
234+
fi
235+
else
236+
_fail "Stale PID file: pid=$_DIAG_PID is not running"
237+
_info "Typesense crashed or was killed."
238+
_info "Fix: rm '$TYPESENSE_PID_FILE' (or just run: ts start)"
239+
fi
240+
else
241+
_fail "PID file contains invalid value: '$_DIAG_PID'"
242+
_info "Fix: rm '$TYPESENSE_PID_FILE'"
243+
fi
244+
fi
245+
246+
# ── 6. Port ────────────────────────────────────────────────────────────
247+
_head "6/7 Port"
248+
_PORT_HOLDER=""
249+
if command -v ss &>/dev/null; then
250+
_PORT_HOLDER=$(ss -tlnp 2>/dev/null \
251+
| awk -v p=":${_DIAG_PORT} " '$0 ~ p || $0 ~ p"$" {print $NF}' \
252+
| grep -oP 'pid=\K[0-9]+' | head -1)
253+
fi
254+
if [ -n "$_PORT_HOLDER" ]; then
255+
_PNAME=$(cat /proc/"$_PORT_HOLDER"/comm 2>/dev/null || echo "?")
256+
_ok "Port ${_DIAG_PORT} in use by: $_PNAME (pid=$_PORT_HOLDER)"
257+
else
258+
_info "Port ${_DIAG_PORT} is not in use — Typesense is not listening"
259+
fi
260+
261+
# ── 7. HTTP health ─────────────────────────────────────────────────────
262+
_head "7/7 HTTP health"
263+
_HEALTH_URL="http://127.0.0.1:${_DIAG_PORT}/health"
264+
if command -v curl &>/dev/null; then
265+
_HTTP_CODE=$(curl -s -o /tmp/_ts_diag_body \
266+
-w '%{http_code}' --max-time 3 "$_HEALTH_URL" 2>/dev/null)
267+
_HTTP_BODY=$(cat /tmp/_ts_diag_body 2>/dev/null)
268+
rm -f /tmp/_ts_diag_body
269+
case "$_HTTP_CODE" in
270+
200) _ok "HTTP 200 $_HEALTH_URL$_HTTP_BODY" ;;
271+
503) _warn "HTTP 503 $_HEALTH_URL → still loading ($_HTTP_BODY)" ;;
272+
000|"") _fail "No response from $_HEALTH_URL — Typesense is not running" ;;
273+
*) _warn "HTTP $_HTTP_CODE $_HEALTH_URL$_HTTP_BODY" ;;
274+
esac
275+
else
276+
_info "(curl not available — skipping HTTP check)"
277+
fi
278+
279+
# ── Recent log ─────────────────────────────────────────────────────────
280+
printf '\n[diag] Recent log (%s)\n' "$TYPESENSE_LOG"
281+
if [ -f "$TYPESENSE_LOG" ] && [ -s "$TYPESENSE_LOG" ]; then
282+
tail -15 "$TYPESENSE_LOG" | while IFS= read -r _line; do printf ' %s\n' "$_line"; done
283+
elif [ -f "$TYPESENSE_LOG" ]; then
284+
printf ' (empty — Typesense may have crashed before writing anything)\n'
285+
printf ' Check: binary missing? port conflict? RocksDB lock? (see checks above)\n'
286+
else
287+
printf ' (no log file yet)\n'
288+
fi
289+
290+
# ── Summary ────────────────────────────────────────────────────────────
291+
printf '\n[diag] ─────────────────────────────────────────────────────────────\n'
292+
printf '[diag] %d passed %d warnings %d failed\n' "$_PASS" "$_WARN" "$_FAIL"
293+
if [ "$_FAIL" -gt 0 ]; then
294+
printf '[diag] Fix the issues above, then run: ts start\n'
295+
exit 1
296+
elif [ "$_WARN" -gt 0 ]; then
297+
printf '[diag] Ready to start (with warnings).\n'
298+
else
299+
printf '[diag] Everything looks good.\n'
300+
fi
301+
exit 0
302+
fi
303+
105304
# ── Stop command ──────────────────────────────────────────────────────────────
106305

107306
if [ "$STOP" = "1" ]; then

ts.mjs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
* root --remove NAME Remove a root from config.json
2222
* build Docker only: build the Docker image
2323
* setup Docker: build image if needed + start container
24+
* diag WSL only: run startup diagnostics (binary, config,
25+
* port, RocksDB lock, HTTP health)
2426
*/
2527

2628
import fs from 'fs';
@@ -551,12 +553,17 @@ async function cmdStop() {
551553
}
552554

553555
async function cmdRestart() {
554-
// Restart only the management daemon — Typesense keeps running.
556+
// Restart the management daemon. Typesense is left running if it is already up.
557+
// In WSL mode we also ensure Typesense is started if it went down (e.g. after a
558+
// WSL session restart), so the new daemon does not get stuck in its loading loop.
555559
await shutdownDaemon();
560+
if (MODE === 'wsl') {
561+
wslRun(['--background', '--disown']);
562+
}
556563
startTsqueryDaemon();
557564
log(`Waiting for management API on port ${API_PORT}...`);
558565
await pollHealth(API_PORT, 30_000, 'management API');
559-
log('Daemon restarted. Typesense was not restarted.');
566+
log('Daemon restarted.');
560567
}
561568

562569
async function cmdStatus() {
@@ -691,6 +698,14 @@ function cmdLog(args) {
691698
docker(dockerArgs);
692699
}
693700

701+
async function cmdDiag() {
702+
if (MODE !== 'wsl') {
703+
log('diag is only available in WSL mode.');
704+
return;
705+
}
706+
wslRun(['--diag']);
707+
}
708+
694709
function cmdBuild() {
695710
const dockerfile = path.join(__dirname, 'docker', 'Dockerfile');
696711
if (!fs.existsSync(dockerfile)) die(`Dockerfile not found: ${dockerfile}`);
@@ -795,6 +810,7 @@ Commands:
795810
root --remove NAME Remove a root from config.json
796811
build Docker only: build the Docker image
797812
setup Build image if needed, then start
813+
diag WSL only: diagnose startup problems
798814
`.trim());
799815
process.exit(0);
800816
}
@@ -859,6 +875,7 @@ const commands = {
859875
root: cmdRoot,
860876
build: () => cmdBuild(),
861877
setup: cmdSetup,
878+
diag: cmdDiag,
862879
};
863880

864881
if (!commands[args.cmd]) {

0 commit comments

Comments
 (0)