1- import path from 'path'
2-
31import type { Response , NextFunction } from 'express'
4- import slash from 'slash'
52import got from 'got'
63
74import statsd from '@/observability/lib/statsd.js'
@@ -25,18 +22,16 @@ import getRedirect, { splitPathByLanguage } from '@/redirects/lib/get-redirect.j
2522import getRemoteJSON from '@/frame/lib/get-remote-json.js'
2623import { ExtendedRequest } from '@/types'
2724
28- const REMOTE_ENTERPRISE_STORAGE_URL = 'https://githubdocs.azureedge.net/enterprise'
29-
30- function splitByLanguage ( uri : string ) {
31- let language = null
32- let withoutLanguage = uri
33- const match = uri . match ( languagePrefixPathRegex )
34- if ( match ) {
35- language = match [ 1 ]
36- withoutLanguage = uri . replace ( languagePrefixPathRegex , '/' )
37- }
38- return [ language , withoutLanguage ]
39- }
25+ const OLD_PUBLIC_AZURE_BLOB_URL = 'https://githubdocs.azureedge.net'
26+ // Old Azure Blob Storage `enterprise` container.
27+ const OLD_AZURE_BLOB_ENTERPRISE_DIR = `${ OLD_PUBLIC_AZURE_BLOB_URL } /enterprise`
28+ // Old Azure Blob storage `github-images` container with
29+ // the root directory of 'enterprise'.
30+ const OLD_GITHUB_IMAGES_ENTERPRISE_DIR = `${ OLD_PUBLIC_AZURE_BLOB_URL } /github-images/enterprise`
31+ const OLD_DEVELOPER_SITE_CONTAINER = `${ OLD_PUBLIC_AZURE_BLOB_URL } /developer-site`
32+ // This is the new repo naming convention we use for each archived enterprise
33+ // version. E.g. https://github.github.com/docs-ghes-2.10
34+ const ENTERPRISE_GH_PAGES_URL_PREFIX = 'https://github.github.com/docs-ghes-'
4035
4136type ArchivedRedirects = {
4237 [ url : string ] : string | null
@@ -93,7 +88,8 @@ const retryConfiguration = { limit: 3 }
9388const timeoutConfiguration = { response : 1500 }
9489
9590// This module handles requests for deprecated GitHub Enterprise versions
96- // by routing them to static content in help-docs-archived-enterprise-versions
91+ // by routing them to static content in
92+ // one of the docs-ghes-<release number> repos.
9793
9894export default async function archivedEnterpriseVersions (
9995 req : ExtendedRequest ,
@@ -108,6 +104,7 @@ export default async function archivedEnterpriseVersions(
108104
109105 const redirectCode = pathLanguagePrefixed ( req . path ) ? 301 : 302
110106
107+ // Redirects for releases 3.0+
111108 if ( deprecatedWithFunctionalRedirects . includes ( requestedVersion ) ) {
112109 const redirectTo = getRedirect ( req . path , req . context )
113110 if ( redirectTo ) {
@@ -138,8 +135,7 @@ export default async function archivedEnterpriseVersions(
138135 return res . redirect ( redirectCode , `/${ language } ${ newRedirectTo } ` )
139136 }
140137 }
141- // redirect language-prefixed URLs like /en/enterprise/2.10 -> /enterprise/2.10
142- // (this only applies to versions <2.13)
138+ // For releases 2.13 and lower, redirect language-prefixed URLs like /en/enterprise/2.10 -> /enterprise/2.10
143139 if (
144140 req . path . startsWith ( '/en/' ) &&
145141 versionSatisfiesRange ( requestedVersion , `<${ firstVersionDeprecatedOnNewSite } ` )
@@ -148,8 +144,7 @@ export default async function archivedEnterpriseVersions(
148144 return res . redirect ( redirectCode , req . baseUrl + req . path . replace ( / ^ \/ e n / , '' ) )
149145 }
150146
151- // find redirects for versions between 2.13 and 2.17
152- // starting with 2.18, we updated the archival script to create a redirects.json file
147+ // Redirects for releases 2.13 - 2.17
153148 if (
154149 versionSatisfiesRange ( requestedVersion , `>=${ firstVersionDeprecatedOnNewSite } ` ) &&
155150 versionSatisfiesRange ( requestedVersion , `<=${ lastVersionWithoutArchivedRedirectsFile } ` )
@@ -173,7 +168,8 @@ export default async function archivedEnterpriseVersions(
173168 return res . redirect ( redirectCode , redirect )
174169 }
175170 }
176-
171+ // Redirects for 2.18 - 3.0. Starting with 2.18, we updated the archival
172+ // script to create a redirects.json file
177173 if (
178174 versionSatisfiesRange ( requestedVersion , `>${ lastVersionWithoutArchivedRedirectsFile } ` ) &&
179175 ! deprecatedWithFunctionalRedirects . includes ( requestedVersion )
@@ -195,19 +191,25 @@ export default async function archivedEnterpriseVersions(
195191 return res . redirect ( redirectCode , redirectJson [ req . path ] )
196192 }
197193 }
198-
199- const statsdTags = [ `version:${ requestedVersion } ` ]
194+ // Retrieve the page from the archived repo
200195 const doGet = ( ) =>
201196 got ( getProxyPath ( req . path , requestedVersion ) , {
202197 throwHttpErrors : false ,
203198 retry : retryConfiguration ,
204199 timeout : timeoutConfiguration ,
205200 } )
201+
202+ const statsdTags = [ `version:${ requestedVersion } ` ]
206203 const r = await statsd . asyncTimer ( doGet , 'archive_enterprise_proxy' , [
207204 ...statsdTags ,
208205 `path:${ req . path } ` ,
209206 ] ) ( )
207+
210208 if ( r . statusCode === 200 ) {
209+ const [ , withoutLanguagePath ] = splitByLanguage ( req . path )
210+ const isDeveloperPage = withoutLanguagePath ?. startsWith (
211+ `/enterprise/${ requestedVersion } /developer` ,
212+ )
211213 res . set ( 'x-robots-tag' , 'noindex' )
212214
213215 // make stubbed redirect files (which exist in versions <2.13) redirect with a 301
@@ -221,11 +223,74 @@ export default async function archivedEnterpriseVersions(
221223
222224 cacheAggressively ( res )
223225
226+ // Releases 3.2 and higher contain image asset paths with the
227+ // old Azure Blob Storage URL. These need to be rewritten to
228+ // the new archived enterprise repo URL.
229+ if ( versionSatisfiesRange ( requestedVersion , `>=${ firstReleaseStoredInBlobStorage } ` ) ) {
230+ r . body = r . body
231+ . replaceAll (
232+ `${ OLD_AZURE_BLOB_ENTERPRISE_DIR } /${ requestedVersion } /assets/cb-` ,
233+ `${ ENTERPRISE_GH_PAGES_URL_PREFIX } ${ requestedVersion } /assets/cb-` ,
234+ )
235+ . replaceAll (
236+ `${ OLD_AZURE_BLOB_ENTERPRISE_DIR } /${ requestedVersion } /` ,
237+ `${ req . protocol } ://${ req . get ( 'host' ) } /enterprise-server@${ requestedVersion } /` ,
238+ )
239+ }
240+
241+ // Releases 3.1 and lower were previously hosted in the
242+ // help-docs-archived-enterprise-versions repo. Only the images
243+ // were stored in the old Azure Blob Storage `github-images` container.
244+ // The image paths all need to be updated to reference the images in the
245+ // new archived enterprise repo's root assets directory.
246+ if ( versionSatisfiesRange ( requestedVersion , `<${ firstReleaseStoredInBlobStorage } ` ) ) {
247+ r . body = r . body . replaceAll (
248+ `${ OLD_GITHUB_IMAGES_ENTERPRISE_DIR } /${ requestedVersion } ` ,
249+ `${ ENTERPRISE_GH_PAGES_URL_PREFIX } ${ requestedVersion } ` ,
250+ )
251+ if ( versionSatisfiesRange ( requestedVersion , '<=2.18' ) && isDeveloperPage ) {
252+ r . body = r . body . replaceAll (
253+ `${ OLD_DEVELOPER_SITE_CONTAINER } /${ requestedVersion } ` ,
254+ `${ ENTERPRISE_GH_PAGES_URL_PREFIX } ${ requestedVersion } /developer` ,
255+ )
256+ // Update all hrefs to add /developer to the path
257+ r . body = r . body . replaceAll (
258+ `="/enterprise/${ requestedVersion } ` ,
259+ `="/enterprise/${ requestedVersion } /developer` ,
260+ )
261+ // The changelog is the only thing remaining on developer.github.com
262+ r . body = r . body . replaceAll ( 'href="/changes' , 'href="https://developer.github.com/changes' )
263+ }
264+ }
265+
266+ // In all releases, some assets were incorrectly scraped and contain
267+ // deep relative paths. For example, releases 3.4+ use the webp format
268+ // for images. The URLs for those images were never rewritten to pull
269+ // from the Azure Blob Storage container. This may be due to not
270+ // updating our scraping tool to handle the new image types. There
271+ // are additional images in older versions that also have a relative path.
272+ // We want to update the URLs in the format
273+ // "../../../../../../assets/" to prefix the assets directory with the
274+ // new archived enterprise repo URL.
275+ r . body = r . body . replaceAll (
276+ / = " ( \. \. \/ ) * a s s e t s / g,
277+ `="${ ENTERPRISE_GH_PAGES_URL_PREFIX } ${ requestedVersion } /assets` ,
278+ )
279+
280+ // Fix broken hrefs on the 2.16 landing page
281+ if ( requestedVersion === '2.16' && req . path === '/en/enterprise/2.16' ) {
282+ r . body = r . body . replaceAll ( 'ref="/en/enterprise' , 'ref="/en/enterprise/2.16' )
283+ }
284+
285+ // Remove the search results container from the page, which removes a white
286+ // box that prevents clicking on page links
287+ r . body = r . body . replaceAll ( '<div id="search-results-container"></div>' , '' )
288+
224289 return res . send ( r . body )
225290 }
226-
227- // from 2.13 to 2.17, we lost access to frontmatter redirects during the archival process
228- // this workaround finds potentially relevant frontmatter redirects in currently supported pages
291+ // In releases 2.13 - 2.17, we lost access to frontmatter redirects
292+ // during the archival process. This workaround finds potentially
293+ // relevant frontmatter redirects in currently supported pages
229294 if (
230295 versionSatisfiesRange ( requestedVersion , `>=${ firstVersionDeprecatedOnNewSite } ` ) &&
231296 versionSatisfiesRange ( requestedVersion , `<=${ lastVersionWithoutArchivedRedirectsFile } ` )
@@ -244,18 +309,35 @@ export default async function archivedEnterpriseVersions(
244309 return next ( )
245310}
246311
247- // paths are slightly different depending on the version
248- // for >=2.13: /2.13/en/enterprise/2.13/user/articles/viewing-contributions-on-your-profile
249- // for <2.13: /2.12/user/articles/viewing-contributions-on-your-profile
250312function getProxyPath ( reqPath : string , requestedVersion : string ) {
251- if ( versionSatisfiesRange ( requestedVersion , `>=${ firstReleaseStoredInBlobStorage } ` ) ) {
313+ const [ , withoutLanguagePath ] = splitByLanguage ( reqPath )
314+ const isDeveloperPage = withoutLanguagePath ?. startsWith (
315+ `/enterprise/${ requestedVersion } /developer` ,
316+ )
317+
318+ // This was the last release supported on developer.github.com
319+ if ( isDeveloperPage ) {
320+ const enterprisePath = `/enterprise/${ requestedVersion } `
321+ const newReqPath = reqPath . replace ( enterprisePath , '' )
322+ return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + newReqPath
323+ }
324+
325+ // Releases 2.18 and higher
326+ if ( versionSatisfiesRange ( requestedVersion , `>${ lastVersionWithoutArchivedRedirectsFile } ` ) ) {
252327 const newReqPath = reqPath . includes ( 'redirects.json' ) ? `/${ reqPath } ` : reqPath + '/index.html'
253- return ` ${ REMOTE_ENTERPRISE_STORAGE_URL } / ${ requestedVersion } ${ newReqPath } `
328+ return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + newReqPath
254329 }
255- const proxyPath = versionSatisfiesRange ( requestedVersion , `>=${ firstVersionDeprecatedOnNewSite } ` )
256- ? slash ( path . join ( '/' , requestedVersion , reqPath ) )
257- : reqPath . replace ( / ^ \/ e n t e r p r i s e / , '' )
258- return `https://github.github.com/help-docs-archived-enterprise-versions${ proxyPath } `
330+
331+ // Releases 2.13 - 2.17
332+ // redirect.json files don't exist for these versions
333+ if ( versionSatisfiesRange ( requestedVersion , `>=2.13` ) ) {
334+ return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + reqPath + '/index.html'
335+ }
336+
337+ // Releases 2.12 and lower
338+ const enterprisePath = `/enterprise/${ requestedVersion } `
339+ const newReqPath = reqPath . replace ( enterprisePath , '' )
340+ return ENTERPRISE_GH_PAGES_URL_PREFIX + requestedVersion + newReqPath
259341}
260342
261343// Module-level global cache object.
@@ -276,7 +358,7 @@ function getFallbackRedirect(req: ExtendedRequest) {
276358 //
277359 // The keys are valid URLs that it can redirect to. I.e. these are
278360 // URLs that we definitely know are valid and will be found
279- // in https://github.com/github/help- docs-archived-enterprise-versions
361+ // in one of the docs-ghes-<release number> repos.
280362 // The array values are possible URLs we deem acceptable redirect
281363 // sources.
282364 // But to avoid an unnecessary, O(n), loop every time, we turn this
@@ -311,3 +393,14 @@ function getFallbackRedirect(req: ExtendedRequest) {
311393 return `/${ language } ${ fallback } `
312394 }
313395}
396+
397+ function splitByLanguage ( uri : string ) {
398+ let language = null
399+ let withoutLanguage = uri
400+ const match = uri . match ( languagePrefixPathRegex )
401+ if ( match ) {
402+ language = match [ 1 ]
403+ withoutLanguage = uri . replace ( languagePrefixPathRegex , '/' )
404+ }
405+ return [ language , withoutLanguage ]
406+ }
0 commit comments