|
| 1 | +import cheerio from 'cheerio' |
| 2 | + |
| 3 | +import warmServer from '@/frame/lib/warm-server.js' |
| 4 | +import { liquid } from '@/content-render/index.js' |
| 5 | +import shortVersions from '@/versions/middleware/short-versions.js' |
| 6 | +import contextualize from '@/frame/middleware/context/context.js' |
| 7 | +import features from '@/versions/middleware/features.js' |
| 8 | +import findPage from '@/frame/middleware/find-page.js' |
| 9 | +import { createMinimalProcessor } from '@/content-render/unified/processor.js' |
| 10 | +import getRedirect from '@/redirects/lib/get-redirect.js' |
| 11 | + |
| 12 | +export type DocsUrls = { |
| 13 | + [identifier: string]: string |
| 14 | +} |
| 15 | + |
| 16 | +type Page = { |
| 17 | + permalinks: Permalink[] |
| 18 | + relativePath: string |
| 19 | + rawIntro: string |
| 20 | + rawPermissions?: string |
| 21 | + markdown: string |
| 22 | +} |
| 23 | +type Permalink = { |
| 24 | + href: string |
| 25 | + languageCode: string |
| 26 | +} |
| 27 | +type PageMap = { |
| 28 | + [href: string]: Page |
| 29 | +} |
| 30 | +type Redirects = { |
| 31 | + [from: string]: string |
| 32 | +} |
| 33 | + |
| 34 | +export type Check = { |
| 35 | + identifier: string |
| 36 | + url: string |
| 37 | + pageURL: string |
| 38 | + found: boolean |
| 39 | + fragment: string | undefined |
| 40 | + fragmentFound?: boolean |
| 41 | + fragmentCandidates?: string[] |
| 42 | + // If the URL lead to a redirect, this is its URL (starting with /en/...) |
| 43 | + redirectPageURL?: string |
| 44 | + // If the URL lead to a redirect, this is what the new URL should be |
| 45 | + // (for example /the/new/pathname#my-fragment) |
| 46 | + redirect?: string |
| 47 | +} |
| 48 | + |
| 49 | +export async function validateDocsUrl(docsUrls: DocsUrls, { checkFragments = false } = {}) { |
| 50 | + const site = await warmServer(['en']) |
| 51 | + const pages: PageMap = site.pages |
| 52 | + const redirects: Redirects = site.redirects |
| 53 | + |
| 54 | + const checks: Check[] = [] |
| 55 | + for (const [identifier, url] of Object.entries(docsUrls)) { |
| 56 | + if (!url.startsWith('/')) { |
| 57 | + throw new Error(`URL doesn't start with '/': ${url} (identifier: ${identifier})`) |
| 58 | + } |
| 59 | + const pathname = url.split('?')[0] |
| 60 | + // If the url is just '/' we want to check the homepage, |
| 61 | + // which is `/en`, not `/en/`. |
| 62 | + const [pageURL, fragment] = `/en${pathname === '/' ? '' : pathname}`.split('#') |
| 63 | + |
| 64 | + const page = pages[pageURL] |
| 65 | + const check: Check = { |
| 66 | + identifier, |
| 67 | + url, |
| 68 | + pageURL, |
| 69 | + fragment, |
| 70 | + found: !!page, |
| 71 | + } |
| 72 | + let redirectedPage: Page | null = null |
| 73 | + if (!page) { |
| 74 | + const redirect = getRedirect(pageURL, { |
| 75 | + userLanguage: 'en', |
| 76 | + redirects, |
| 77 | + pages, |
| 78 | + }) |
| 79 | + if (redirect) { |
| 80 | + redirectedPage = pages[redirect] |
| 81 | + if (!redirectedPage) { |
| 82 | + throw new Error(`The redirected page doesn't exist: ${redirect}`) |
| 83 | + } |
| 84 | + check.found = true |
| 85 | + check.redirectPageURL = redirect |
| 86 | + check.redirect = stripLanguagePrefix(redirect) |
| 87 | + if (fragment) { |
| 88 | + check.redirect += `#${fragment}` |
| 89 | + } |
| 90 | + } |
| 91 | + } |
| 92 | + |
| 93 | + if (checkFragments && fragment) { |
| 94 | + const permalink = (redirectedPage || page).permalinks[0] |
| 95 | + const html = await renderInnerHTML(redirectedPage || page, permalink) |
| 96 | + const $ = cheerio.load(html) |
| 97 | + check.fragmentFound = $(`#${fragment}`).length > 0 || $(`a[name="${fragment}"]`).length > 0 |
| 98 | + if (!check.fragmentFound) { |
| 99 | + const fragmentCandidates: string[] = [] |
| 100 | + $('h2[id], h3[id]').each((_, el) => { |
| 101 | + const id = $(el).attr('id') |
| 102 | + if (id) { |
| 103 | + fragmentCandidates.push(id) |
| 104 | + } |
| 105 | + }) |
| 106 | + check.fragmentCandidates = fragmentCandidates |
| 107 | + } |
| 108 | + } |
| 109 | + checks.push(check) |
| 110 | + } |
| 111 | + return checks |
| 112 | +} |
| 113 | + |
| 114 | +async function renderInnerHTML(page: Page, permalink: Permalink) { |
| 115 | + const next = () => {} |
| 116 | + const res = {} |
| 117 | + |
| 118 | + const pagePath = permalink.href |
| 119 | + const req = { |
| 120 | + path: pagePath, |
| 121 | + language: permalink.languageCode, |
| 122 | + pagePath, |
| 123 | + cookies: {}, |
| 124 | + // The contextualize() middleware will create a new one. |
| 125 | + // Here it just exists for the sake of TypeScript. |
| 126 | + context: {}, |
| 127 | + } |
| 128 | + await contextualize(req, res, next) |
| 129 | + await shortVersions(req, res, next) |
| 130 | + await findPage(req, res, next) |
| 131 | + await features(req, res, next) |
| 132 | + |
| 133 | + const markdown = await liquid.parseAndRender(page.markdown, req.context) |
| 134 | + const processor = createMinimalProcessor(req.context) |
| 135 | + const vFile = await processor.process(markdown) |
| 136 | + return vFile.toString() |
| 137 | +} |
| 138 | + |
| 139 | +function stripLanguagePrefix(url: string) { |
| 140 | + return url.replace(/^\/en\//, '/') |
| 141 | +} |
0 commit comments