-
Notifications
You must be signed in to change notification settings - Fork 42
Validate and update links (STF-557) #375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| name: Links | ||
|
|
||
| on: | ||
| push: | ||
| pull_request: | ||
| schedule: | ||
| - cron: "0 13 * * 1" # weekly, to catch external link rot without a commit | ||
| workflow_dispatch: | ||
|
|
||
| permissions: | ||
| contents: read | ||
|
|
||
| jobs: | ||
| linkChecker: | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | ||
| with: | ||
| persist-credentials: false | ||
|
|
||
| - name: Setup mise | ||
| uses: jdx/mise-action@6d1e696aa24c1aa1bcc1adea0212707c71ab78a8 # v3.6.1 | ||
| with: | ||
| install: false | ||
|
|
||
| # Install only lychee (not the repo's full toolchain) and run the check. | ||
| - name: Check links | ||
| env: | ||
| MISE_AUTO_INSTALL: "false" | ||
| run: | | ||
| mise install lychee | ||
| mise run check-links |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| .coverage | ||
| .eggs | ||
| .idea | ||
| .lycheecache | ||
| .pyre | ||
| .tox | ||
| build | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,67 @@ | ||||||
| # Lychee link checker configuration | ||||||
| # https://lychee.cli.rs/#/usage/config | ||||||
| # | ||||||
| # Run locally with: | ||||||
| # lychee './**/*.md' './**/*.rst' './maxminddb/**/*.py' './pyproject.toml' | ||||||
|
|
||||||
| # Include URL fragments in checks | ||||||
| include_fragments = true | ||||||
|
|
||||||
| # Don't allow any redirects, so links that have moved are surfaced and can be | ||||||
| # updated to their canonical destination. | ||||||
| max_redirects = 0 | ||||||
|
|
||||||
| # Accept these HTTP status codes | ||||||
| # 100-103: Informational responses | ||||||
| # 200-299: Success responses | ||||||
| # 403: Forbidden (some sites use this for rate limiting) | ||||||
| # 429: Too Many Requests | ||||||
| # 500-599: Server errors (temporary issues shouldn't fail CI) | ||||||
| # 999: LinkedIn's custom status code | ||||||
| accept = ["100..=103", "200..=299", "403", "429", "500..=599", "999"] | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Including
Suggested change
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keeping — Claude (posted on Greg's behalf) |
||||||
|
|
||||||
| # Exclude URL patterns from checking (treated as regular expressions) | ||||||
| exclude = [ | ||||||
| # GitHub blob URLs with line-number fragments (not parseable as page anchors) | ||||||
| '^https://github\.com/[^/]+/[^/]+/blob/[0-9a-fA-F]+/.+#L\d+$', | ||||||
| # Live / auth-gated MaxMind endpoints: appear as code string literals or | ||||||
| # require login, so they can't be verified by an anonymous request. | ||||||
| '^https://geoip\.maxmind\.com', | ||||||
| '^https://geolite\.info', | ||||||
| '^https://minfraud\.maxmind\.com', | ||||||
| '^https://sandbox\.maxmind\.com', | ||||||
| '^https://updates\.maxmind\.com', | ||||||
| '^https://www\.maxmind\.com/en/accounts/', | ||||||
| '^https://www\.maxmind\.com/en/account/login', | ||||||
| # Local / placeholder URLs (e.g. the proxy example in docstrings) | ||||||
| '^file://', | ||||||
| '^https?://example\.(com|org|net)', | ||||||
| '^http://localhost', | ||||||
| '127\.0\.0\.1', | ||||||
| ] | ||||||
|
|
||||||
| # Exclude file paths from getting checked (regular expressions, matched against | ||||||
| # the path relative to the working directory). Patterns are segment-anchored | ||||||
| # with (^|/) so short names like "build" don't match unintended paths. | ||||||
| exclude_path = [ | ||||||
| '(^|/)node_modules/', | ||||||
| '(^|/)\.venv/', | ||||||
| '(^|/)venv/', | ||||||
| '(^|/)build/', | ||||||
| '(^|/)dist/', | ||||||
| '(^|/)\.eggs/', | ||||||
| '(^|/)[^/]*\.egg-info/', | ||||||
| '(^|/)docs/_build/', | ||||||
| # Vendored git submodules (upstream C library and shared test fixtures) | ||||||
| '(^|/)extension/libmaxminddb/', | ||||||
| '(^|/)tests/data/', | ||||||
| # Changelog: historical entries are preserved as-is, not rewritten | ||||||
| '(^|/)HISTORY\.rst$', | ||||||
| ] | ||||||
|
|
||||||
| # Cache results for 1 day to speed up repeated checks | ||||||
| cache = true | ||||||
| max_cache_age = "1d" | ||||||
|
|
||||||
| # Skip missing input files instead of erroring | ||||||
| skip_missing = true | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| # @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html | ||
|
|
||
| [[tools.lychee]] | ||
| version = "0.23.0" | ||
| backend = "aqua:lycheeverse/lychee" | ||
|
|
||
| [tools.lychee."platforms.linux-arm64"] | ||
| checksum = "sha256:97eb93b02a7d78a752fc33e5b0983439ccaadbf3db952b68a0a4401acd92e6e0" | ||
| url = "https://github.com/lycheeverse/lychee/releases/download/lychee-v0.23.0/lychee-aarch64-unknown-linux-gnu.tar.gz" | ||
|
|
||
| [tools.lychee."platforms.linux-arm64-musl"] | ||
| checksum = "sha256:97eb93b02a7d78a752fc33e5b0983439ccaadbf3db952b68a0a4401acd92e6e0" | ||
| url = "https://github.com/lycheeverse/lychee/releases/download/lychee-v0.23.0/lychee-aarch64-unknown-linux-gnu.tar.gz" | ||
|
|
||
| [tools.lychee."platforms.linux-x64"] | ||
| checksum = "sha256:5538440d2c69a45a0a09983271e5dee0c2fe7137d8035d25b2632e10a66a090a" | ||
| url = "https://github.com/lycheeverse/lychee/releases/download/lychee-v0.23.0/lychee-x86_64-unknown-linux-musl.tar.gz" | ||
|
|
||
| [tools.lychee."platforms.linux-x64-musl"] | ||
| checksum = "sha256:5538440d2c69a45a0a09983271e5dee0c2fe7137d8035d25b2632e10a66a090a" | ||
| url = "https://github.com/lycheeverse/lychee/releases/download/lychee-v0.23.0/lychee-x86_64-unknown-linux-musl.tar.gz" | ||
|
|
||
| [tools.lychee."platforms.macos-arm64"] | ||
| checksum = "sha256:4c8034900e11083b68ac6f6582c377ff1f704e268991999e09d717973e493e7f" | ||
| url = "https://github.com/lycheeverse/lychee/releases/download/lychee-v0.23.0/lychee-arm64-macos.dmg" | ||
|
|
||
| [tools.lychee."platforms.windows-x64"] | ||
| checksum = "sha256:0fda7ff0a60c0250939fc25361c2d4e6e7853c31c996733fdd5a1dd760bcb824" | ||
| url = "https://github.com/lycheeverse/lychee/releases/download/lychee-v0.23.0/lychee-x86_64-windows.exe" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| [settings] | ||
| lockfile = true | ||
|
|
||
| [tools] | ||
| lychee = "latest" | ||
|
|
||
| [tasks.check-links] | ||
| description = "Check links with lychee" | ||
| run = "lychee --no-progress './**/*.md' './**/*.rst' './maxminddb/**/*.py' './pyproject.toml'" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Setting
max_redirects = 0will cause the link checker to fail on any HTTP redirect (such as301 Moved Permanentlyor302 Found). While this is useful for ensuring all links are canonical, in practice, external websites frequently use redirects for path normalization, HTTPS enforcement, or localization. Failing the CI for these external redirects can make the build highly fragile and prone to failing due to changes outside of your control.\n\nConsider allowing a small number of redirects (e.g.,3) to make the CI more robust, or run redirect checks as a non-blocking warning-only workflow.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Intentionally keeping
max_redirects = 0. Surfacing moved/redirecting links is the goal of this change (STF-557), and it matches the dev-site and blog-site lychee configs; redirects are resolved by updating the link to its canonical target.— Claude (posted on Greg's behalf)