diff --git a/CHANGELOG.md b/CHANGELOG.md index c9703a785..9143adf26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Bug Fixes + +- **Linked git worktrees parked inside a repo are no longer indexed as separate projects.** When you keep `git worktree add` checkouts in a gitignored subdir (`.worktrees/`, `.claude/worktrees/`, …), the embedded-repo discovery (#514) treated each worktree as its own repo and re-indexed it — duplicating every symbol once per worktree (an N-worktree repo ballooned the index ~Nx and made `query`/`callers`/`impact` return N copies of each result). CodeGraph now detects a linked worktree (its `.git` is a file resolving into another repo's `worktrees/` admin dir) and skips it, since it's the same repo as the main checkout. Submodules and genuine embedded repos are unaffected. + ## [1.0.0] - 2026-06-12 diff --git a/__tests__/multi-repo-workspace.test.ts b/__tests__/multi-repo-workspace.test.ts index db1fba2a2..7bc3af6d8 100644 --- a/__tests__/multi-repo-workspace.test.ts +++ b/__tests__/multi-repo-workspace.test.ts @@ -64,6 +64,31 @@ describe('multi-repo workspaces (#514)', () => { expect(files).toContain('tools.ts'); // the parent's own tracked code still indexes }); + it('excludes linked git worktrees nested under the repo, but keeps genuine embedded repos', () => { + // Main repo with a tracked file; .worktrees/ is gitignored (the common + // place tools put linked worktrees). + write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n'); + write(path.join(ws, '.gitignore'), '/.worktrees/\n'); + makeRepo(ws); + + // A REAL linked worktree: its `.git` is a file pointing into + // ws/.git/worktrees/, and it holds a full copy of the SAME repo's + // tracked files — re-indexing it duplicates every symbol N times. + git(ws, 'worktree', 'add', '-q', '-b', 'wt-branch', path.join(ws, '.worktrees/wt')); + + // A genuine independent repo under the same ignored dir MUST still be + // discovered (#514 must not regress). + write(path.join(ws, '.worktrees/real-embed/src/lib.ts'), 'export function lib() {}\n'); + makeRepo(path.join(ws, '.worktrees/real-embed')); + + const files = scanDirectory(ws); + expect(files).toContain('src/app.ts'); // the main checkout indexes normally + // the linked worktree's copy is NOT re-indexed (no symbol duplication) + expect(files.some((f) => f.startsWith('.worktrees/wt/'))).toBe(false); + // a real embedded repo in the same ignored dir is still picked up + expect(files).toContain('.worktrees/real-embed/src/lib.ts'); + }); + it('keeps respecting the parent .gitignore for the parent own (non-repo) dirs', () => { write(path.join(ws, 'scratch/junk.ts'), 'export function junk() { return 9; }\n'); write(path.join(ws, 'src/app.ts'), 'export function app() { return 1; }\n'); diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 9921c6fb6..8dd8968f3 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -280,12 +280,37 @@ const EMBEDDED_REPO_SEARCH_DEPTH = 4; /** Max directories examined per search — a huge ignored data dir must never stall a scan/sync. */ const EMBEDDED_REPO_SEARCH_ENTRIES = 2000; +/** + * True if `dotGitPath` is a *linked git worktree* pointer — a `.git` FILE whose + * `gitdir:` resolves into another repo's `worktrees/` admin dir. Such a + * worktree is a second checkout of an ALREADY-indexed repo (the main checkout), + * not an independent project, so the indexer must not re-walk it — doing so + * duplicates every symbol once per worktree. Workflows that park linked + * worktrees in a gitignored subdir (`.worktrees/`, `.claude/worktrees/`, …) are + * common, and the embedded-repo discovery (#514) would otherwise treat each as + * its own repo. A submodule pointer (`gitdir: …/modules/`) and a genuine + * embedded repo (`.git` is a directory) both return false, so neither is + * affected. + */ +function isLinkedWorktree(dotGitPath: string): boolean { + try { + if (!fs.lstatSync(dotGitPath).isFile()) return false; + const m = /^gitdir:\s*(.+)$/m.exec(fs.readFileSync(dotGitPath, 'utf-8')); + if (!m) return false; + return /(^|[\\/])worktrees[\\/]/.test((m[1] ?? '').trim()); + } catch { + return false; + } +} + /** * Find git repositories nested under `absDir` (inclusive), shallow bounded BFS. * Stops descending at each repo root found — contents belong to that repo's own * enumeration. Skips default-ignored dirs (`node_modules` can contain `.git` - * from npm git-dependencies — that never makes it project code) and CodeGraph - * data dirs. Depth- and entry-capped so a huge ignored tree can't stall the scan. + * from npm git-dependencies — that never makes it project code), CodeGraph + * data dirs, and linked git worktrees (same repo as the main checkout — see + * isLinkedWorktree). Depth- and entry-capped so a huge ignored tree can't stall + * the scan. */ function findNestedGitRepos(absDir: string, relPrefix: string): string[] { const found: string[] = []; @@ -300,8 +325,12 @@ function findNestedGitRepos(absDir: string, relPrefix: string): string[] { logDebug('Embedded-repo search entry cap hit — deeper repos (if any) not discovered', { under: relPrefix }); break; } - if (fs.existsSync(path.join(abs, '.git'))) { - found.push(rel); + const dotGit = path.join(abs, '.git'); + if (fs.existsSync(dotGit)) { + // A linked worktree is the same repo as the main checkout (already + // indexed) — record nothing so it isn't re-walked, but still stop + // descending: its contents belong to that worktree, not the parent. + if (!isLinkedWorktree(dotGit)) found.push(rel); continue; // its own git handles everything below } if (depth >= EMBEDDED_REPO_SEARCH_DEPTH) continue;