initial commit

hexclave · aadesh18 · Mar 23, 2026 · Mar 23, 2026 · Mar 24, 2026 · Mar 24, 2026
commit 95ca0a29618677633a24c83724e52b4eacdee8b6
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 [![Stack Logo](/.github/assets/logo.png)](https://stack-auth.com)
 
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/stack-auth/stack-auth)
+
 <h3 align="center">
   <a href="https://docs.stack-auth.com">📘 Docs</a>
   | <a href="https://stack-auth.com/">☁️ Hosted Version</a>

diff --git a/apps/backend/.env b/apps/backend/.env
@@ -118,3 +118,8 @@ STACK_TELEGRAM_CHAT_ID=# enter your telegram chat id
 
 # Docs AI tool bundle
 STACK_DOCS_INTERNAL_BASE_URL=# override the docs origin used by the backend's AI tool bundle to call the docs app's `/api/internal/docs-tools` endpoint. Defaults to http://localhost:${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}04 in dev, https://mcp.stack-auth.com in prod
+
+# MCP review tool (SpacetimeDB)
+STACK_SPACETIMEDB_URI=# SpacetimeDB host URI; default empty (logging disabled)
+STACK_SPACETIMEDB_DB_NAME=# SpacetimeDB database name
+STACK_MCP_LOG_TOKEN=# shared secret gating the log_mcp_call reducer; must match EXPECTED_LOG_TOKEN in apps/internal-tool/spacetimedb/src/index.ts
diff --git a/apps/backend/.env.development b/apps/backend/.env.development
@@ -112,6 +112,11 @@ STACK_QSTASH_TOKEN=eyJVc2VySUQiOiJkZWZhdWx0VXNlciIsIlBhc3N3b3JkIjoiZGVmYXVsdFBhc
 STACK_QSTASH_CURRENT_SIGNING_KEY=sig_7kYjw48mhY7kAjqNGcy6cr29RJ6r
 STACK_QSTASH_NEXT_SIGNING_KEY=sig_5ZB6DVzB1wjE8S6rZ7eenA8Pdnhs
 
+# MCP review tool (SpacetimeDB)
+STACK_SPACETIMEDB_URI=ws://localhost:${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}39
+STACK_SPACETIMEDB_DB_NAME=stack-auth-llm
+STACK_MCP_LOG_TOKEN=change-me
+
 # Clickhouse
 STACK_CLICKHOUSE_URL=http://localhost:${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}36
 STACK_CLICKHOUSE_ADMIN_USER=stackframe

diff --git a/apps/backend/package.json b/apps/backend/package.json
@@ -55,6 +55,7 @@
   },
   "dependencies": {
     "@ai-sdk/mcp": "^1.0.21",
+    "spacetimedb": "^2.1.0",
     "@ai-sdk/openai": "^3.0.29",
     "@aws-sdk/client-s3": "^3.855.0",
     "@clickhouse/client": "^1.14.0",

diff --git a/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts b/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts
@@ -11,6 +11,10 @@ import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
 import { StatusError } from "@stackframe/stack-shared/dist/utils/errors";
 import { Json } from "@stackframe/stack-shared/dist/utils/json";
 import { generateText, ModelMessage, stepCountIs, streamText } from "ai";
+import { logMcpCall } from "@/lib/ai/mcp-logger";
+import { reviewMcpCall } from "@/lib/ai/qa-reviewer";
+import { getVerifiedQaContext } from "@/lib/ai/verified-qa";
+import { runAsynchronously } from "@stackframe/stack-shared/dist/utils/promises";
 
 export const POST = createSmartRouteHandler({
   metadata: {
@@ -61,10 +65,13 @@ export const POST = createSmartRouteHandler({
     }
 
     const model = selectModel(quality, speed, isAuthenticated);
-    const systemPrompt = getFullSystemPrompt(systemPromptId);
+    const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai";
+    let systemPrompt = getFullSystemPrompt(systemPromptId);
+    if (isDocsOrSearch) {
+      systemPrompt += await getVerifiedQaContext();
+    }
     const tools = await getTools(toolNames, { auth: fullReq.auth, targetProjectId: projectId });
     const toolsArg = Object.keys(tools).length > 0 ? tools : undefined;
-    const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai";
     const stepLimit = toolsArg == null ? 1 : isDocsOrSearch ? 50 : 5;
 
     if (mode === "stream") {
@@ -81,6 +88,7 @@ export const POST = createSmartRouteHandler({
         body: result.toUIMessageStreamResponse(),
       };
     } else {
+      const startedAt = Date.now();
       const controller = new AbortController();
       const timeoutId = setTimeout(() => controller.abort(), 120_000);
       const result = await generateText({
@@ -129,10 +137,49 @@ export const POST = createSmartRouteHandler({
         });
       });
 
+      let responseConversationId: string | undefined;
+      if (body.mcpCallMetadata != null) {
+        const correlationId = crypto.randomUUID();
+        const conversationId = body.mcpCallMetadata.conversationId ?? crypto.randomUUID();
+        responseConversationId = conversationId;
+        const firstUserMessage = messages.find(m => m.role === "user");
+        const question = typeof firstUserMessage?.content === "string"
+          ? firstUserMessage.content
+          : JSON.stringify(firstUserMessage?.content ?? "");
+
+        const innerToolCallsJson = JSON.stringify(contentBlocks.filter(b => b.type === "tool-call"));
+
+        runAsynchronously(logMcpCall({
+          correlationId,
+          toolName: body.mcpCallMetadata.toolName,
+          reason: body.mcpCallMetadata.reason,
+          userPrompt: body.mcpCallMetadata.userPrompt,
+          conversationId,
+          question,
+          response: result.text,
+          stepCount: result.steps.length,
+          innerToolCallsJson,
+          durationMs: BigInt(Date.now() - startedAt),
+          modelId: String(model.modelId),
+          errorMessage: undefined,
+        }));
+
+        runAsynchronously(reviewMcpCall({
+          correlationId,
+          question,
+          reason: body.mcpCallMetadata.reason,
+          response: result.text,
+        }));
+      }
+
       return {
         statusCode: 200,
         bodyType: "json" as const,
-        body: { content: contentBlocks, finalText: result.text },
+        body: {
+          content: contentBlocks,
+          finalText: result.text,
+          conversationId: responseConversationId ?? null,
+        },
       };
     }
   },

diff --git a/apps/backend/src/lib/ai/mcp-logger.ts b/apps/backend/src/lib/ai/mcp-logger.ts
@@ -0,0 +1,51 @@
+import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
+import { captureError } from "@stackframe/stack-shared/dist/utils/errors";
+import { DbConnection } from "./spacetimedb-bindings";
+import type { LogMcpCallParams } from "./spacetimedb-bindings/types/reducers";
+
+export type McpLogEntry = Omit<LogMcpCallParams, "token">;
+
+let connectionPromise: Promise<DbConnection> | null = null;
+
+export async function getConnection(): Promise<DbConnection | null> {
+  const uri = getEnvVariable("STACK_SPACETIMEDB_URI", "");
+  if (!uri) {
+    return null;
+  }
+
+  if (!connectionPromise) {
+    connectionPromise = new Promise<DbConnection>((resolve, reject) => {
+      DbConnection.builder()
+        .withUri(uri)
+        .withDatabaseName(getEnvVariable("STACK_SPACETIMEDB_DB_NAME"))
+        .onConnect((connInstance) => {
+          connInstance.subscriptionBuilder()
+            .onApplied(() => {
+              resolve(connInstance);
+            })
+            .subscribe("SELECT * FROM mcp_call_log");
+        })
+        .onConnectError((_: unknown, err: Error) => {
+          captureError("mcp-logger", err);
+          connectionPromise = null;
+          reject(err);
+        })
+        .build();
+    });
+  }
+
+  return await connectionPromise;
+}
+
+export async function logMcpCall(entry: McpLogEntry): Promise<void> {
+  const conn = await getConnection();
+  if (!conn) {
+    return;
+  }
+
+  const token = getEnvVariable("STACK_MCP_LOG_TOKEN");
+  await conn.reducers.logMcpCall({
+    token,
+    ...entry,
+  });
+}
diff --git a/apps/backend/src/lib/ai/prompts.ts b/apps/backend/src/lib/ai/prompts.ts
@@ -112,11 +112,12 @@ You are Stack Auth's AI assistant. You help users with Stack Auth - a complete a
 
 Think step by step about what to say. Being wrong is 100x worse than saying you don't know.
 
-## TOOL USAGE WORKFLOW:
-1. **FIRST**, use \`search_docs\` with relevant keywords to find related documentation
-2. **THEN**, use \`get_docs_by_id\` to retrieve the full content of the most relevant pages
-3. Base your answer on the actual documentation content retrieved
-4. When referring to API endpoints, **always cite the actual endpoint** (e.g., "GET /users/me") not the documentation URL
+## PRIORITY ORDER:
+1. **FIRST**, check the Human-Verified Knowledge Base (appended at the end of this prompt, if any). If the user's question matches or is similar to a verified Q&A, use that answer exactly — do not search docs or use any other source.
+2. **THEN**, use \`search_docs\` with relevant keywords to find related documentation
+3. **THEN**, use \`get_docs_by_id\` to retrieve the full content of the most relevant pages
+4. Base your answer on the actual documentation content retrieved
+5. When referring to API endpoints, **always cite the actual endpoint** (e.g., "GET /users/me") not the documentation URL
 
 ## CORE RESPONSIBILITIES:
 1. Help users implement Stack Auth in their applications

diff --git a/apps/backend/src/lib/ai/qa-reviewer.ts b/apps/backend/src/lib/ai/qa-reviewer.ts
@@ -0,0 +1,172 @@
+import { createMCPClient } from "@ai-sdk/mcp";
+import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
+import { captureError } from "@stackframe/stack-shared/dist/utils/errors";
+import { generateText, stepCountIs } from "ai";
+import { getConnection } from "./mcp-logger";
+import { createOpenRouterProvider } from "./models";
+import { getVerifiedQaContext } from "./verified-qa";
+
+const QA_SYSTEM_PROMPT = `You are a QA reviewer for Stack Auth's AI documentation assistant.
+You will receive a question, the agent's stated reason for asking, and the AI's response.
+
+Your tasks:
+1. RELEVANCE: Does the response actually answer the question? Does the stated reason align with what was asked?
+2. CORRECTNESS: Verify factual claims about Stack Auth. Use human-verified Q&A (appended below, if any) as the highest-priority source of truth — these are always correct. Then use the available tools to look up additional information from the Stack Auth codebase. If the AI response contradicts a human-verified answer, flag it as incorrect.
+
+The repo name for all tool calls is "stack-auth/stack-auth". Only use the repository documentation tools (read_wiki_structure, read_wiki_contents, ask_question) — do not create sessions or modify any other resources.
+
+You MUST respond with ONLY valid JSON matching this exact schema (no markdown, no explanation outside the JSON):
+{
+  "needsHumanReview": boolean,
+  "answerCorrect": boolean,
+  "answerRelevant": boolean,
+  "flags": [{"type": string, "severity": "low" | "medium" | "high" | "critical", "explanation": string}],
+  "improvementSuggestions": string,
+  "overallScore": number
+}
+
+Flag types: "factual_error", "incomplete_answer", "off_topic", "hallucination", "outdated_info", "missing_context", "misleading", "reason_mismatch"
+
+Scoring:
+- 90-100: Excellent — factually correct, fully addresses the question
+- 70-89: Good — minor issues or missing details
+- 50-69: Acceptable — notable issues but core answer is present
+- 30-49: Poor — significant problems
+- 0-29: Unacceptable — fundamentally wrong or irrelevant
+
+Set needsHumanReview=true if: score < 50, any critical flag, or you are uncertain about correctness.`;
+
+const REVIEW_MODEL_ID = "anthropic/claude-haiku-4.5";
+
+export async function reviewMcpCall(entry: {
+  correlationId: string;
+  question: string;
+  reason: string;
+  response: string;
+}): Promise<void> {
+  const apiKey = getEnvVariable("STACK_OPENROUTER_API_KEY", "");
+  if (!apiKey || apiKey === "FORWARD_TO_PRODUCTION") {
+    return;
+  }
+
+  let devinClient: Awaited<ReturnType<typeof createMCPClient>> | null = null;
+
+  const failureUpdate = (err: unknown) => ({
+    qaNeedsHumanReview: true,
+    qaAnswerCorrect: false,
+    qaAnswerRelevant: false,
+    qaFlagsJson: "[]",
+    qaImprovementSuggestions: "",
+    qaOverallScore: 0,
+    qaConversationJson: undefined,
+    qaErrorMessage: String(err),
+  });
+
+  let update: {
+    qaNeedsHumanReview: boolean,
+    qaAnswerCorrect: boolean,
+    qaAnswerRelevant: boolean,
+    qaFlagsJson: string,
+    qaImprovementSuggestions: string,
+    qaOverallScore: number,
+    qaConversationJson: string | undefined,
+    qaErrorMessage: string | undefined,
+  };
+
+  try {
+    // Wait for the log row to be written first
+    await new Promise(r => setTimeout(r, 3000));
+
+    devinClient = await createMCPClient({
+      transport: {
+        type: "http",
+        url: "https://mcp.deepwiki.com/mcp",
+      },
+    });
+
+    const devinTools = await devinClient.tools();
+    const openrouter = createOpenRouterProvider();
+    const model = openrouter(REVIEW_MODEL_ID);
+
+    const userMessage = [
+      "## Question",
+      entry.question,
+      "",
+      "## Agent's Reason for Asking",
+      entry.reason,
+      "",
+      "## AI Response",
+      entry.response,
+    ].join("\n");
+
+    const verifiedQa = await getVerifiedQaContext();
+
+    const result = await generateText({
+      model,
+      system: QA_SYSTEM_PROMPT + verifiedQa,
+      tools: devinTools as Parameters<typeof generateText>[0]["tools"],
+      stopWhen: stepCountIs(10),
+      messages: [{ role: "user", content: userMessage }],
+    });
+
+    const conversation = result.steps.map((step, i) => {
+      const toolCalls = step.toolCalls.map(tc => ({ toolName: tc.toolName, args: tc.input }));
+      const toolResults = step.toolResults.map(tr => ({
+        toolName: tr.toolName,
+        toolCallId: tr.toolCallId,
+        result: tr.output,
+      }));
+      return {
+        step: i + 1,
+        text: step.text || undefined,
+        toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+        toolResults: toolResults.length > 0 ? toolResults : undefined,
+      };
+    });
+
+    const jsonMatch = result.text.match(/\{[\s\S]*\}/);
+    if (!jsonMatch) {
+      throw new Error("No JSON found in QA review response");
+    }
+    const parsed = JSON.parse(jsonMatch[0]) as {
+      needsHumanReview: boolean,
+      answerCorrect: boolean,
+      answerRelevant: boolean,
+      flags: Array<{ type: string, severity: string, explanation: string }>,
+      improvementSuggestions: string,
+      overallScore: number,
+    };
+
+    update = {
+      qaNeedsHumanReview: parsed.needsHumanReview,
+      qaAnswerCorrect: parsed.answerCorrect,
+      qaAnswerRelevant: parsed.answerRelevant,
+      qaFlagsJson: JSON.stringify(parsed.flags),
+      qaImprovementSuggestions: parsed.improvementSuggestions,
+      qaOverallScore: parsed.overallScore,
+      qaConversationJson: JSON.stringify(conversation),
+      qaErrorMessage: undefined,
+    };
+  } catch (err) {
+    captureError("qa-reviewer", err instanceof Error ? err : new Error(String(err)));
+    update = failureUpdate(err);
+  }
+
+  if (devinClient) {
+    await devinClient.close().catch((err: unknown) => {
+      captureError("qa-reviewer", err instanceof Error ? err : new Error(String(err)));
+    });
+  }
+
+  const conn = await getConnection();
+  if (!conn) return;
+  const token = getEnvVariable("STACK_MCP_LOG_TOKEN");
+  await conn.reducers.updateMcpQaReview({
+    token,
+    correlationId: entry.correlationId,
+    qaReviewModelId: REVIEW_MODEL_ID,
+    ...update,
+  }).catch((err: unknown) => {
+    captureError("qa-reviewer", err instanceof Error ? err : new Error(String(err)));
+  });
+}
diff --git a/apps/backend/src/lib/ai/schema.ts b/apps/backend/src/lib/ai/schema.ts
@@ -24,6 +24,12 @@ export const requestBodySchema = yupObject({
     }).defined()
   ).defined().min(1),
   projectId: yupString().optional().nullable(),
+  mcpCallMetadata: yupObject({
+    toolName: yupString().defined(),
+    reason: yupString().defined(),
+    userPrompt: yupString().defined(),
+    conversationId: yupString().optional().nullable(),
+  }).optional().nullable(),
 });
 
 export type RequestBody = InferType<typeof requestBodySchema>;
diff --git a/apps/backend/src/lib/ai/spacetimedb-bindings/add_manual_qa_reducer.ts b/apps/backend/src/lib/ai/spacetimedb-bindings/add_manual_qa_reducer.ts
@@ -0,0 +1,18 @@
+// THIS FILE IS AUTOMATICALLY GENERATED BY SPACETIMEDB. EDITS TO THIS FILE
+// WILL NOT BE SAVED. MODIFY TABLES IN YOUR MODULE SOURCE CODE INSTEAD.
+
+/* eslint-disable */
+/* tslint:disable */
+import {
+  TypeBuilder as __TypeBuilder,
+  t as __t,
+  type AlgebraicTypeType as __AlgebraicTypeType,
+  type Infer as __Infer,
+} from "spacetimedb";
+
+export default {
+  question: __t.string(),
+  answer: __t.string(),
+  publish: __t.bool(),
+  reviewedBy: __t.string(),
+};