Skip to content

Commit bf4af51

Browse files
Peter Bengtssonrsese
andauthored
Periodically validate docs-urls.json in github/github (#49220)
Co-authored-by: Robert Sese <734194+rsese@users.noreply.github.com>
1 parent 14ba0e9 commit bf4af51

11 files changed

Lines changed: 750 additions & 4 deletions

File tree

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
name: Validate github/github docs URLs
2+
3+
# **What it does**: Checks the URLs in docs-urls.json in github/github
4+
# **Why we have it**: To ensure the values in docs-urls.json are perfect.
5+
# **Who does it impact**: Docs content.
6+
7+
on:
8+
workflow_dispatch:
9+
schedule:
10+
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST
11+
pull_request:
12+
13+
permissions:
14+
contents: read
15+
issues: write
16+
pull-requests: write
17+
18+
concurrency:
19+
group: ${{ github.workflow }}-${{ github.ref }}
20+
cancel-in-progress: true
21+
jobs:
22+
validate_github_github_docs_urls:
23+
name: Validate github/github docs URLs
24+
if: github.repository == 'github/docs-internal'
25+
runs-on: ubuntu-20.04-xl
26+
steps:
27+
- name: Check out repo's default branch
28+
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
29+
30+
- uses: ./.github/actions/node-npm-setup
31+
32+
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
33+
with:
34+
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
35+
repository: github/github
36+
ref: master
37+
path: github
38+
39+
- name: Run validation
40+
run: |
41+
# This will generate a .json file which we can use to
42+
# do other things in other steps.
43+
npm run validate-github-github-docs-urls -- validate \
44+
--output checks.json \
45+
github/config/docs-urls.json
46+
47+
- name: Update config/docs-urls.json in github/github (possibly)
48+
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
49+
env:
50+
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }}
51+
run: |
52+
npm run validate-github-github-docs-urls -- generate-new-json checks.json github/config/docs-urls.json
53+
54+
cd github
55+
git status
56+
git diff
57+
changes=$(git diff --name-only | wc -l)
58+
if [[ $changes -eq 0 ]]; then
59+
echo "There are no changes to commit after running generate-new-json. Exiting this step"
60+
exit 0
61+
fi
62+
63+
current_timestamp=$(date '+%Y-%m-%d-%H%M%S')
64+
branch_name="update-docs-urls-$current_timestamp"
65+
git checkout -b "$branch_name"
66+
current_daystamp=$(date '+%Y-%m-%d')
67+
git commit -a -m "Update Docs URLs from automation ($current_daystamp)"
68+
git push origin "$branch_name"
69+
70+
# XXX TODO
71+
# Perhaps post an issue somewhere, about that the fact that this
72+
# branch has been created and now needs to be turned into a PR
73+
# that some human can take responsibility for.
74+
75+
- name: Clean up old branches in github/github
76+
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
77+
env:
78+
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }}
79+
run: |
80+
npm run validate-github-github-docs-urls -- clean-up-old-branches --prefix update-docs-urls
81+
82+
echo "To see them all, go to:"
83+
echo "https://github.com/github/github/branches/all?query=update-docs-urls-"
84+
85+
# If a PR comes along to github/docs-internal that causes some
86+
# URLs in docs-urls.json (in github/github) to now fail, then
87+
# we'll want to make the PR author+reviewer aware of this.
88+
# For example, you moved a page without setting up a redirect.
89+
# Or you edited a heading that now breaks a URL with fragment.
90+
# In the latter case, you might want to update the URL in docs-urls.json
91+
# after this PR has landed, or consider using `<a name="..."></a>` as a
92+
# workaround for the time being.
93+
- name: Generate PR comment
94+
if: ${{ github.event_name == 'pull_request' }}
95+
env:
96+
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }}
97+
ISSUE_NUMBER: ${{ github.event.pull_request.number }}
98+
REPOSITORY: ${{ github.repository }}
99+
run: npm run validate-github-github-docs-urls -- post-pr-comment checks.json
100+
101+
- uses: ./.github/actions/slack-alert
102+
if: ${{ failure() && github.event_name == 'schedule' }}
103+
with:
104+
slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
105+
slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"update-data-and-image-paths": "node src/early-access/scripts/update-data-and-image-paths.js",
7070
"update-internal-links": "node src/links/scripts/update-internal-links.js",
7171
"validate-asset-images": "node src/assets/scripts/validate-asset-images.js",
72+
"validate-github-github-docs-urls": "tsx src/links/scripts/validate-github-github-docs-urls/index.ts",
7273
"warmup-remotejson": "node src/archives/scripts/warmup-remotejson.js"
7374
},
7475
"lint-staged": {

src/frame/lib/warm-server.d.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
type Site = {
2+
pages: Record<String, Page>
3+
redirects: Record<string, string>
4+
unversionedTree: Record<string, string>
5+
siteTree: Record<string, string>
6+
pageList: Page[]
7+
}
8+
9+
export default function warmServer(languages: string[]): Promise<Site>

src/frame/lib/warm-server.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,17 @@ const dog = {
1515
// For multiple-triggered Promise sharing
1616
let promisedWarmServer
1717

18-
async function warmServer() {
18+
async function warmServer(languagesOnly = []) {
1919
const startTime = Date.now()
2020

2121
if (process.env.NODE_ENV !== 'test') {
22-
console.log('Priming context information...')
22+
console.log(
23+
'Priming context information...',
24+
languagesOnly && languagesOnly.length ? `${languagesOnly.join(',')} only` : '',
25+
)
2326
}
2427

25-
const unversionedTree = await dog.loadUnversionedTree()
28+
const unversionedTree = await dog.loadUnversionedTree(languagesOnly)
2629
const siteTree = await dog.loadSiteTree(unversionedTree)
2730
const pageList = await dog.loadPages(unversionedTree)
2831
const pageMap = await dog.loadPageMap(pageList)
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import cheerio from 'cheerio'
2+
3+
import warmServer from '@/frame/lib/warm-server.js'
4+
import { liquid } from '@/content-render/index.js'
5+
import shortVersions from '@/versions/middleware/short-versions.js'
6+
import contextualize from '@/frame/middleware/context/context.js'
7+
import features from '@/versions/middleware/features.js'
8+
import findPage from '@/frame/middleware/find-page.js'
9+
import { createMinimalProcessor } from '@/content-render/unified/processor.js'
10+
import getRedirect from '@/redirects/lib/get-redirect.js'
11+
12+
export type DocsUrls = {
13+
[identifier: string]: string
14+
}
15+
16+
type Page = {
17+
permalinks: Permalink[]
18+
relativePath: string
19+
rawIntro: string
20+
rawPermissions?: string
21+
markdown: string
22+
}
23+
type Permalink = {
24+
href: string
25+
languageCode: string
26+
}
27+
type PageMap = {
28+
[href: string]: Page
29+
}
30+
type Redirects = {
31+
[from: string]: string
32+
}
33+
34+
export type Check = {
35+
identifier: string
36+
url: string
37+
pageURL: string
38+
found: boolean
39+
fragment: string | undefined
40+
fragmentFound?: boolean
41+
fragmentCandidates?: string[]
42+
// If the URL lead to a redirect, this is its URL (starting with /en/...)
43+
redirectPageURL?: string
44+
// If the URL lead to a redirect, this is what the new URL should be
45+
// (for example /the/new/pathname#my-fragment)
46+
redirect?: string
47+
}
48+
49+
export async function validateDocsUrl(docsUrls: DocsUrls, { checkFragments = false } = {}) {
50+
const site = await warmServer(['en'])
51+
const pages: PageMap = site.pages
52+
const redirects: Redirects = site.redirects
53+
54+
const checks: Check[] = []
55+
for (const [identifier, url] of Object.entries(docsUrls)) {
56+
if (!url.startsWith('/')) {
57+
throw new Error(`URL doesn't start with '/': ${url} (identifier: ${identifier})`)
58+
}
59+
const pathname = url.split('?')[0]
60+
// If the url is just '/' we want to check the homepage,
61+
// which is `/en`, not `/en/`.
62+
const [pageURL, fragment] = `/en${pathname === '/' ? '' : pathname}`.split('#')
63+
64+
const page = pages[pageURL]
65+
const check: Check = {
66+
identifier,
67+
url,
68+
pageURL,
69+
fragment,
70+
found: !!page,
71+
}
72+
let redirectedPage: Page | null = null
73+
if (!page) {
74+
const redirect = getRedirect(pageURL, {
75+
userLanguage: 'en',
76+
redirects,
77+
pages,
78+
})
79+
if (redirect) {
80+
redirectedPage = pages[redirect]
81+
if (!redirectedPage) {
82+
throw new Error(`The redirected page doesn't exist: ${redirect}`)
83+
}
84+
check.found = true
85+
check.redirectPageURL = redirect
86+
check.redirect = stripLanguagePrefix(redirect)
87+
if (fragment) {
88+
check.redirect += `#${fragment}`
89+
}
90+
}
91+
}
92+
93+
if (checkFragments && fragment) {
94+
const permalink = (redirectedPage || page).permalinks[0]
95+
const html = await renderInnerHTML(redirectedPage || page, permalink)
96+
const $ = cheerio.load(html)
97+
check.fragmentFound = $(`#${fragment}`).length > 0 || $(`a[name="${fragment}"]`).length > 0
98+
if (!check.fragmentFound) {
99+
const fragmentCandidates: string[] = []
100+
$('h2[id], h3[id]').each((_, el) => {
101+
const id = $(el).attr('id')
102+
if (id) {
103+
fragmentCandidates.push(id)
104+
}
105+
})
106+
check.fragmentCandidates = fragmentCandidates
107+
}
108+
}
109+
checks.push(check)
110+
}
111+
return checks
112+
}
113+
114+
async function renderInnerHTML(page: Page, permalink: Permalink) {
115+
const next = () => {}
116+
const res = {}
117+
118+
const pagePath = permalink.href
119+
const req = {
120+
path: pagePath,
121+
language: permalink.languageCode,
122+
pagePath,
123+
cookies: {},
124+
// The contextualize() middleware will create a new one.
125+
// Here it just exists for the sake of TypeScript.
126+
context: {},
127+
}
128+
await contextualize(req, res, next)
129+
await shortVersions(req, res, next)
130+
await findPage(req, res, next)
131+
await features(req, res, next)
132+
133+
const markdown = await liquid.parseAndRender(page.markdown, req.context)
134+
const processor = createMinimalProcessor(req.context)
135+
const vFile = await processor.process(markdown)
136+
return vFile.toString()
137+
}
138+
139+
function stripLanguagePrefix(url: string) {
140+
return url.replace(/^\/en\//, '/')
141+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import { Octokit } from '@octokit/rest'
2+
import { retry } from '@octokit/plugin-retry'
3+
4+
const DEFAULT_MIN_DAYS = 30
5+
6+
type Options = {
7+
prefix: string
8+
minDays: number
9+
repository: string
10+
}
11+
12+
export async function cleanUpOldBranches(options: Options) {
13+
const minDays = parseInt(`${options.minDays || DEFAULT_MIN_DAYS}`, 10)
14+
15+
if (!process.env.GITHUB_TOKEN) {
16+
throw new Error('You must set the GITHUB_TOKEN environment variable.')
17+
}
18+
const octokit = retryingOctokit(process.env.GITHUB_TOKEN)
19+
20+
const [owner, repo] = options.repository.split('/')
21+
const { data: refs } = await octokit.request(
22+
'GET /repos/{owner}/{repo}/git/matching-refs/{ref}',
23+
{
24+
owner,
25+
repo,
26+
ref: `heads/${options.prefix}`,
27+
},
28+
)
29+
30+
for (const ref of refs) {
31+
const branchName = ref.ref.replace('refs/heads/', '')
32+
const { data: branch } = await octokit.request('GET /repos/{owner}/{repo}/branches/{branch}', {
33+
owner,
34+
repo,
35+
branch: branchName,
36+
})
37+
const { name, commit } = branch
38+
if (!commit.commit.author || !commit.commit.author.date) continue
39+
const lastUpdated = new Date(commit.commit.author.date)
40+
const ageDays = (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24)
41+
console.log(
42+
`Branch ${name} was last updated ${ageDays.toFixed(1)} days ago (${lastUpdated.toISOString()})`,
43+
)
44+
if (ageDays > minDays) {
45+
console.log(`Deleting branch ${name} !!`)
46+
await octokit.request('DELETE /repos/{owner}/{repo}/git/refs/{ref}', {
47+
owner,
48+
repo,
49+
ref: `heads/${name}`,
50+
})
51+
} else {
52+
console.log(`Branch ${name} is not old enough (min days: ${minDays})`)
53+
}
54+
}
55+
}
56+
57+
function retryingOctokit(token: string) {
58+
const RetryingOctokit = Octokit.plugin(retry)
59+
return new RetryingOctokit({
60+
auth: `token ${token}`,
61+
})
62+
}

0 commit comments

Comments
 (0)