From 2c6662c361d1b7c69c414bdf45a15c8170c43b38 Mon Sep 17 00:00:00 2001 From: Emir Karabeg Date: Tue, 14 Apr 2026 19:45:00 -0700 Subject: [PATCH 1/5] improvement(seo): optimize sitemaps and robots.txt across sim and docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add missing pages to sim sitemap: blog author pages, academy catalog and course pages - Fix 6x duplicate URL bug in docs sitemap by deduplicating with source.getLanguages() - Convert docs sitemap from route handler to Next.js metadata convention with native hreflang - Add x-default hreflang alternate for docs multi-language pages - Remove changeFrequency and priority fields (Google ignores both) - Fix inaccurate lastModified timestamps — derive from real content dates, omit when unknown - Consolidate 20+ redundant per-bot robots rules into single wildcard entry - Add /form/ and /credential-account/ to sim robots disallow list - Reference image sitemap in sim robots.txt - Remove deprecated host directive from sim robots - Move disallow rules before allow in docs robots for crawler compatibility - Extract hardcoded docs baseUrl to env variable with production fallback --- apps/docs/app/robots.txt/route.ts | 37 +++----- apps/docs/app/sitemap.ts | 43 +++++++++ apps/docs/app/sitemap.xml/route.ts | 62 ------------- apps/sim/app/robots.ts | 136 ++++------------------------- apps/sim/app/sitemap.ts | 126 ++++++++++++++------------ 5 files changed, 141 insertions(+), 263 deletions(-) create mode 100644 apps/docs/app/sitemap.ts delete mode 100644 apps/docs/app/sitemap.xml/route.ts diff --git a/apps/docs/app/robots.txt/route.ts b/apps/docs/app/robots.txt/route.ts index 92c77f5bf1..3ec012fe56 100644 --- a/apps/docs/app/robots.txt/route.ts +++ b/apps/docs/app/robots.txt/route.ts @@ -1,12 +1,22 @@ +const DOCS_BASE_URL = process.env.NEXT_PUBLIC_DOCS_URL ?? 'https://docs.sim.ai' + export const revalidate = false export async function GET() { - const baseUrl = 'https://docs.sim.ai' + const baseUrl = DOCS_BASE_URL const robotsTxt = `# Robots.txt for Sim Documentation User-agent: * +Disallow: /.next/ +Disallow: /api/internal/ +Disallow: /_next/static/ +Disallow: /admin/ Allow: / +Allow: /api/search +Allow: /llms.txt +Allow: /llms-full.txt +Allow: /llms.mdx/ # Search engine crawlers User-agent: Googlebot @@ -58,38 +68,15 @@ Allow: / User-agent: cohere-ai Allow: / -# Disallow admin and internal paths (if any exist) -Disallow: /.next/ -Disallow: /api/internal/ -Disallow: /_next/static/ -Disallow: /admin/ - -# Allow but don't prioritize these -Allow: /api/search -Allow: /llms.txt -Allow: /llms-full.txt -Allow: /llms.mdx/ - # Sitemaps Sitemap: ${baseUrl}/sitemap.xml -# Crawl delay for aggressive bots (optional) -# Crawl-delay: 1 - # Additional resources for AI indexing # See https://github.com/AnswerDotAI/llms-txt for more info # LLM-friendly content: # Manifest: ${baseUrl}/llms.txt # Full content: ${baseUrl}/llms-full.txt -# Individual pages: ${baseUrl}/llms.mdx/[page-path] - -# Multi-language documentation available at: -# ${baseUrl}/en - English -# ${baseUrl}/es - Español -# ${baseUrl}/fr - Français -# ${baseUrl}/de - Deutsch -# ${baseUrl}/ja - 日本語 -# ${baseUrl}/zh - 简体中文` +# Individual pages: ${baseUrl}/llms.mdx/[page-path]` return new Response(robotsTxt, { headers: { diff --git a/apps/docs/app/sitemap.ts b/apps/docs/app/sitemap.ts new file mode 100644 index 0000000000..f2333b886b --- /dev/null +++ b/apps/docs/app/sitemap.ts @@ -0,0 +1,43 @@ +import type { MetadataRoute } from 'next' +import { i18n } from '@/lib/i18n' +import { source } from '@/lib/source' + +export const revalidate = 3600 + +const DOCS_BASE_URL = process.env.NEXT_PUBLIC_DOCS_URL ?? 'https://docs.sim.ai' + +export default function sitemap(): MetadataRoute.Sitemap { + const baseUrl = DOCS_BASE_URL + const languages = source.getLanguages() + + const pagesBySlug = new Map>() + for (const { language, pages } of languages) { + for (const page of pages) { + const key = page.slugs.join('/') + if (!pagesBySlug.has(key)) { + pagesBySlug.set(key, new Map()) + } + pagesBySlug.get(key)!.set(language, `${baseUrl}${page.url}`) + } + } + + const entries: MetadataRoute.Sitemap = [] + for (const [, localeMap] of pagesBySlug) { + const defaultUrl = localeMap.get(i18n.defaultLanguage) + if (!defaultUrl) continue + + const langAlternates: Record = {} + for (const [lang, url] of localeMap) { + langAlternates[lang] = url + } + + langAlternates['x-default'] = defaultUrl + + entries.push({ + url: defaultUrl, + alternates: { languages: langAlternates }, + }) + } + + return entries +} diff --git a/apps/docs/app/sitemap.xml/route.ts b/apps/docs/app/sitemap.xml/route.ts deleted file mode 100644 index 8bfdc54dfb..0000000000 --- a/apps/docs/app/sitemap.xml/route.ts +++ /dev/null @@ -1,62 +0,0 @@ -import { i18n } from '@/lib/i18n' -import { source } from '@/lib/source' - -export const revalidate = 3600 - -export async function GET() { - const baseUrl = 'https://docs.sim.ai' - - const allPages = source.getPages() - - const getPriority = (url: string): string => { - if (url === '/introduction' || url === '/') return '1.0' - if (url === '/getting-started') return '0.9' - if (url.match(/^\/[^/]+$/)) return '0.8' - if (url.includes('/sdks/') || url.includes('/tools/')) return '0.7' - return '0.6' - } - - const urls = allPages - .flatMap((page) => { - const urlWithoutLang = page.url.replace(/^\/[a-z]{2}\//, '/') - - return i18n.languages.map((lang) => { - const url = - lang === i18n.defaultLanguage - ? `${baseUrl}${urlWithoutLang}` - : `${baseUrl}/${lang}${urlWithoutLang}` - - return ` - ${url} - ${getPriority(urlWithoutLang)} - ${i18n.languages.length > 1 ? generateAlternateLinks(baseUrl, urlWithoutLang) : ''} - ` - }) - }) - .join('\n') - - const sitemap = ` - -${urls} -` - - return new Response(sitemap, { - headers: { - 'Content-Type': 'application/xml', - 'Cache-Control': 'public, max-age=3600, s-maxage=3600', - }, - }) -} - -function generateAlternateLinks(baseUrl: string, urlWithoutLang: string): string { - const langLinks = i18n.languages - .map((lang) => { - const url = - lang === i18n.defaultLanguage - ? `${baseUrl}${urlWithoutLang}` - : `${baseUrl}/${lang}${urlWithoutLang}` - return ` ` - }) - .join('\n') - return `${langLinks}\n ` -} diff --git a/apps/sim/app/robots.ts b/apps/sim/app/robots.ts index 0a1a8929a3..d9a7096e2c 100644 --- a/apps/sim/app/robots.ts +++ b/apps/sim/app/robots.ts @@ -4,133 +4,27 @@ import { getBaseUrl } from '@/lib/core/utils/urls' export default function robots(): MetadataRoute.Robots { const baseUrl = getBaseUrl() - const disallowedPaths = [ - '/api/', - '/workspace/', - '/chat/', - '/playground/', - '/resume/', - '/invite/', - '/unsubscribe/', - '/w/', - '/_next/', - '/private/', - ] - return { rules: [ { userAgent: '*', allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Googlebot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Bingbot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'YandexBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Baiduspider', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'GPTBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'ChatGPT-User', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'OAI-SearchBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'ClaudeBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Claude-SearchBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Google-Extended', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'PerplexityBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Meta-ExternalAgent', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'FacebookBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Applebot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Applebot-Extended', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Amazonbot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Bytespider', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'CCBot', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'cohere-ai', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'Grok-web-crawl', - allow: '/', - disallow: disallowedPaths, - }, - { - userAgent: 'DeepSeek-AI', - allow: '/', - disallow: disallowedPaths, + disallow: [ + '/api/', + '/workspace/', + '/chat/', + '/playground/', + '/resume/', + '/invite/', + '/unsubscribe/', + '/w/', + '/form/', + '/credential-account/', + '/_next/', + '/private/', + ], }, ], - sitemap: `${baseUrl}/sitemap.xml`, - host: baseUrl, + sitemap: [`${baseUrl}/sitemap.xml`, `${baseUrl}/blog/sitemap-images.xml`], } } diff --git a/apps/sim/app/sitemap.ts b/apps/sim/app/sitemap.ts index 32cf3f27be..42fccdd0f1 100644 --- a/apps/sim/app/sitemap.ts +++ b/apps/sim/app/sitemap.ts @@ -1,4 +1,5 @@ import type { MetadataRoute } from 'next' +import { COURSES } from '@/lib/academy/content' import { getAllPostMeta } from '@/lib/blog/registry' import { getBaseUrl } from '@/lib/core/utils/urls' import integrations from '@/app/(landing)/integrations/data/integrations.json' @@ -6,69 +7,48 @@ import { ALL_CATALOG_MODELS, MODEL_PROVIDERS_WITH_CATALOGS } from '@/app/(landin export default async function sitemap(): Promise { const baseUrl = getBaseUrl() + const posts = await getAllPostMeta() - const now = new Date() - const integrationPages: MetadataRoute.Sitemap = integrations.map((integration) => ({ - url: `${baseUrl}/integrations/${integration.slug}`, - lastModified: now, - changeFrequency: 'monthly', - priority: 0.6, - })) - const modelHubPages: MetadataRoute.Sitemap = [ - { - url: `${baseUrl}/integrations`, - lastModified: now, - changeFrequency: 'weekly', - priority: 0.8, - }, - { - url: `${baseUrl}/models`, - lastModified: now, - changeFrequency: 'weekly', - priority: 0.8, - }, - { - url: `${baseUrl}/partners`, - lastModified: now, - changeFrequency: 'monthly', - priority: 0.5, - }, - ] - const providerPages: MetadataRoute.Sitemap = MODEL_PROVIDERS_WITH_CATALOGS.map((provider) => ({ - url: `${baseUrl}${provider.href}`, - lastModified: new Date( - Math.max(...provider.models.map((model) => new Date(model.pricing.updatedAt).getTime())) - ), - changeFrequency: 'weekly', - priority: 0.7, - })) - const modelPages: MetadataRoute.Sitemap = ALL_CATALOG_MODELS.map((model) => ({ - url: `${baseUrl}${model.href}`, - lastModified: new Date(model.pricing.updatedAt), - changeFrequency: 'monthly', - priority: 0.6, - })) + const latestPostDate = + posts.length > 0 + ? new Date(Math.max(...posts.map((p) => new Date(p.updated ?? p.date).getTime()))) + : undefined + + const latestModelDate = new Date( + Math.max( + ...MODEL_PROVIDERS_WITH_CATALOGS.flatMap((provider) => + provider.models.map((model) => new Date(model.pricing.updatedAt).getTime()) + ) + ) + ) const staticPages: MetadataRoute.Sitemap = [ { url: baseUrl, - lastModified: now, - changeFrequency: 'daily', - priority: 1.0, + lastModified: new Date(), }, { url: `${baseUrl}/blog`, - lastModified: now, - changeFrequency: 'daily', - priority: 0.8, + lastModified: latestPostDate, }, { url: `${baseUrl}/blog/tags`, - lastModified: now, + lastModified: latestPostDate, }, { url: `${baseUrl}/changelog`, - lastModified: now, + lastModified: latestPostDate, + }, + { + url: `${baseUrl}/integrations`, + lastModified: latestModelDate, + }, + { + url: `${baseUrl}/models`, + lastModified: latestModelDate, + }, + { + url: `${baseUrl}/partners`, }, { url: `${baseUrl}/terms`, @@ -80,20 +60,56 @@ export default async function sitemap(): Promise { }, ] - const posts = await getAllPostMeta() const blogPages: MetadataRoute.Sitemap = posts.map((p) => ({ url: p.canonical, lastModified: new Date(p.updated ?? p.date), - changeFrequency: 'weekly', - priority: 0.7, })) + const authorsMap = new Map() + for (const p of posts) { + for (const author of p.authors ?? [p.author]) { + const postDate = new Date(p.updated ?? p.date) + const existing = authorsMap.get(author.id) + if (!existing || postDate > existing) { + authorsMap.set(author.id, postDate) + } + } + } + const authorPages: MetadataRoute.Sitemap = [...authorsMap.entries()].map(([id, date]) => ({ + url: `${baseUrl}/blog/authors/${id}`, + lastModified: date, + })) + + const integrationPages: MetadataRoute.Sitemap = integrations.map((integration) => ({ + url: `${baseUrl}/integrations/${integration.slug}`, + })) + + const providerPages: MetadataRoute.Sitemap = MODEL_PROVIDERS_WITH_CATALOGS.map((provider) => ({ + url: `${baseUrl}${provider.href}`, + lastModified: new Date( + Math.max(...provider.models.map((model) => new Date(model.pricing.updatedAt).getTime())) + ), + })) + + const modelEntries: MetadataRoute.Sitemap = ALL_CATALOG_MODELS.map((model) => ({ + url: `${baseUrl}${model.href}`, + lastModified: new Date(model.pricing.updatedAt), + })) + + const academyPages: MetadataRoute.Sitemap = [ + { url: `${baseUrl}/academy` }, + ...COURSES.map((course) => ({ + url: `${baseUrl}/academy/${course.slug}`, + })), + ] + return [ ...staticPages, - ...modelHubPages, + ...blogPages, + ...authorPages, ...integrationPages, ...providerPages, - ...modelPages, - ...blogPages, + ...modelEntries, + ...academyPages, ] } From ddb412e730cb29dc7a67e8e69fb7074efa79c481 Mon Sep 17 00:00:00 2001 From: Emir Karabeg Date: Tue, 14 Apr 2026 19:56:30 -0700 Subject: [PATCH 2/5] fix(seo): remove homepage new Date(), guard latestModelDate empty array --- apps/sim/app/sitemap.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/apps/sim/app/sitemap.ts b/apps/sim/app/sitemap.ts index 42fccdd0f1..aa4744c305 100644 --- a/apps/sim/app/sitemap.ts +++ b/apps/sim/app/sitemap.ts @@ -14,18 +14,14 @@ export default async function sitemap(): Promise { ? new Date(Math.max(...posts.map((p) => new Date(p.updated ?? p.date).getTime()))) : undefined - const latestModelDate = new Date( - Math.max( - ...MODEL_PROVIDERS_WITH_CATALOGS.flatMap((provider) => - provider.models.map((model) => new Date(model.pricing.updatedAt).getTime()) - ) - ) + const modelTimes = MODEL_PROVIDERS_WITH_CATALOGS.flatMap((provider) => + provider.models.map((model) => new Date(model.pricing.updatedAt).getTime()) ) + const latestModelDate = modelTimes.length > 0 ? new Date(Math.max(...modelTimes)) : undefined const staticPages: MetadataRoute.Sitemap = [ { url: baseUrl, - lastModified: new Date(), }, { url: `${baseUrl}/blog`, From 281375180fbe9bda4843e1deda64ab3c871ddd3f Mon Sep 17 00:00:00 2001 From: Emir Karabeg Date: Wed, 15 Apr 2026 11:29:06 -0700 Subject: [PATCH 3/5] improvement(seo): consolidate DOCS_BASE_URL, optimize core web vitals Extract hardcoded https://docs.sim.ai into shared DOCS_BASE_URL constant in lib/urls.ts and replace all 20+ instances across layouts, metadata, structured data, LLM manifest, sitemap, and robots files. Remove OneDollarStats analytics script and tighten CSP for improved core web vitals. --- apps/docs/app/[lang]/[[...slug]]/page.tsx | 3 ++- apps/docs/app/[lang]/layout.tsx | 9 +++---- apps/docs/app/layout.tsx | 25 +++++++++--------- apps/docs/app/llms.txt/route.ts | 3 ++- apps/docs/app/robots.txt/route.ts | 2 +- apps/docs/app/sitemap.ts | 3 +-- apps/docs/components/structured-data.tsx | 4 ++- apps/docs/lib/urls.ts | 1 + apps/sim/app/layout.tsx | 6 ----- .../components/analytics/onedollarstats.tsx | 26 ------------------- apps/sim/lib/core/config/env.ts | 1 - apps/sim/lib/core/security/csp.ts | 3 --- apps/sim/package.json | 1 - 13 files changed, 27 insertions(+), 60 deletions(-) create mode 100644 apps/docs/lib/urls.ts delete mode 100644 apps/sim/components/analytics/onedollarstats.tsx diff --git a/apps/docs/app/[lang]/[[...slug]]/page.tsx b/apps/docs/app/[lang]/[[...slug]]/page.tsx index d01cd5d359..8bf0c5fd80 100644 --- a/apps/docs/app/[lang]/[[...slug]]/page.tsx +++ b/apps/docs/app/[lang]/[[...slug]]/page.tsx @@ -17,9 +17,10 @@ import { ResponseSection } from '@/components/ui/response-section' import { i18n } from '@/lib/i18n' import { getApiSpecContent, openapi } from '@/lib/openapi' import { type PageData, source } from '@/lib/source' +import { DOCS_BASE_URL } from '@/lib/urls' const SUPPORTED_LANGUAGES: Set = new Set(i18n.languages) -const BASE_URL = 'https://docs.sim.ai' +const BASE_URL = DOCS_BASE_URL const OG_LOCALE_MAP: Record = { en: 'en_US', diff --git a/apps/docs/app/[lang]/layout.tsx b/apps/docs/app/[lang]/layout.tsx index c500f440cb..4f32bf2076 100644 --- a/apps/docs/app/[lang]/layout.tsx +++ b/apps/docs/app/[lang]/layout.tsx @@ -3,7 +3,6 @@ import { defineI18nUI } from 'fumadocs-ui/i18n' import { DocsLayout } from 'fumadocs-ui/layouts/docs' import { RootProvider } from 'fumadocs-ui/provider/next' import { Geist_Mono, Inter } from 'next/font/google' -import Script from 'next/script' import { SidebarFolder, SidebarItem, @@ -13,6 +12,7 @@ import { Navbar } from '@/components/navbar/navbar' import { SimLogoFull } from '@/components/ui/sim-logo' import { i18n } from '@/lib/i18n' import { source } from '@/lib/source' +import { DOCS_BASE_URL } from '@/lib/urls' import '../global.css' const inter = Inter({ @@ -67,14 +67,14 @@ export default async function Layout({ children, params }: LayoutProps) { name: 'Sim Documentation', description: 'Documentation for Sim — the open-source AI workspace where teams build, deploy, and manage AI agents. Connect 1,000+ integrations and every major LLM.', - url: 'https://docs.sim.ai', + url: DOCS_BASE_URL, publisher: { '@type': 'Organization', name: 'Sim', url: 'https://sim.ai', logo: { '@type': 'ImageObject', - url: 'https://docs.sim.ai/static/logo.png', + url: `${DOCS_BASE_URL}/static/logo.png`, }, }, inLanguage: lang, @@ -82,7 +82,7 @@ export default async function Layout({ children, params }: LayoutProps) { '@type': 'SearchAction', target: { '@type': 'EntryPoint', - urlTemplate: 'https://docs.sim.ai/api/search?q={search_term_string}', + urlTemplate: `${DOCS_BASE_URL}/api/search?q={search_term_string}`, }, 'query-input': 'required name=search_term_string', }, @@ -101,7 +101,6 @@ export default async function Layout({ children, params }: LayoutProps) { /> -