diff --git a/CHANGELOG.md b/CHANGELOG.md index 89ada7950..2f1cf7d97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - New `codegraph list` and `codegraph stop` commands for managing the background daemon. `codegraph list` (alias `ps`) shows every running CodeGraph daemon — project, pid, version, uptime — with `--json` for scripting. `codegraph stop` stops the daemon for the current project (or `codegraph stop `, or `codegraph stop --all` to stop every daemon on the machine). Previously the only way to shut a daemon down was to hunt for its pid and `kill` it by hand. (#845) - The CodeGraph MCP server now self-heals if its main thread ever locks up. A lightweight watchdog notices when the process has stopped responding and stops it so a fresh one starts on your next request — it can no longer sit pinned at 100% CPU with no way to recover. Tune the detection window with `CODEGRAPH_WATCHDOG_TIMEOUT_MS`, or turn it off entirely with `CODEGRAPH_NO_WATCHDOG=1`. (#850) +- CodeGraph now indexes **Magik** (`.magik`) — the SmallWorld/GE Smallworld language used in GIS and asset management platforms. Exemplar definitions (`define_slotted_exemplar`, `define_mixin`, and related forms) are extracted as class nodes, `_method` declarations as methods with their exemplar as the receiver type, named `_proc` blocks as functions, `_package` declarations as namespaces, and inline `##` docstrings are preserved. Call edges are tracked across methods and procedures. ### Fixes diff --git a/README.md b/README.md index 09f1f4209..faf7f77a8 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,7 @@ CodeGraph cuts **tokens, tool calls, and wall-clock time on every repo** — acr | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 | | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes | | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config | -| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, R, Svelte, Vue, Astro, Liquid, Pascal/Delphi | +| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, R, Svelte, Vue, Astro, Liquid, Pascal/Delphi, Magik | | **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 17 frameworks | | **Mixed iOS / React Native / Expo** | Closes cross-language flows that static parsing misses: Swift ↔ ObjC bridging, React Native legacy bridge + TurboModules + Fabric view components, native → JS event emitters, Expo Modules | | **100% Local** | No data leaves your machine. No API keys. No external services. SQLite database only | @@ -673,6 +673,7 @@ is written): | Lua | `.lua` | Full support (functions, methods with receivers, local variables, `require` imports, call edges) | | R | `.R` `.r` | Full support (functions in every assignment form, S4/R5/R6 classes with methods, `library`/`require` imports, `source()` file references, call edges) | | Luau | `.luau` | Full support (everything in Lua, plus `type`/`export type` aliases, typed signatures, and Roblox instance-path `require`) | +| Magik | `.magik` | Full support (exemplars as classes, methods with receiver types, named procedures, inline `##` docstrings, call edges) | ## Measured cross-file coverage diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index df825f529..334e60102 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -7161,3 +7161,119 @@ GeomPoint <- ggproto("GeomPoint", Geom, }); }); }); + +// ─── Magik ────────────────────────────────────────────────────────────────── + +describe('Magik Extraction', () => { + describe('Language detection', () => { + it('detects .magik files', () => { + expect(detectLanguage('my_class.magik')).toBe('magik'); + }); + }); + + it('extracts an exemplar as a class node', () => { + const code = ` +_package sw + +define_slotted_exemplar(:my_class, +\t{:slot_a, _unset}, +\t{}) +$ +`; + const result = extractFromSource('my_class.magik', code); + const cls = result.nodes.find((n) => n.kind === 'class'); + expect(cls).toBeDefined(); + expect(cls?.name).toBe('my_class'); + // Namespace from _package declaration + const ns = result.nodes.find((n) => n.kind === 'namespace'); + expect(ns?.name).toBe('sw'); + expect(cls?.qualifiedName).toContain('my_class'); + }); + + it('extracts methods with exemplar receiver', () => { + const code = ` +_package sw + +_method my_class.my_method(x, y) +\t## Adds two numbers. +\t_local z << x + y +\t_return z +_endmethod +$ + +_method my_class.init(a_name) +\t_return _self +_endmethod +$ +`; + const result = extractFromSource('my_class.magik', code); + const methods = result.nodes.filter((n) => n.kind === 'method'); + expect(methods.length).toBe(2); + + const myMethod = methods.find((n) => n.name === 'my_method'); + expect(myMethod).toBeDefined(); + expect(myMethod?.qualifiedName).toBe('my_class::my_method'); + expect(myMethod?.signature).toBe('(x, y)'); + expect(myMethod?.docstring).toBe('Adds two numbers.'); + + const initMethod = methods.find((n) => n.name === 'init'); + expect(initMethod).toBeDefined(); + expect(initMethod?.qualifiedName).toBe('my_class::init'); + expect(initMethod?.signature).toBe('(a_name)'); + }); + + it('extracts a named procedure as a function', () => { + const code = ` +_package sw + +_proc @my_procedure(a, b) +\t## A standalone procedure. +\t_return a * b +_endproc +$ +`; + const result = extractFromSource('utils.magik', code); + const fn = result.nodes.find((n) => n.kind === 'function'); + expect(fn).toBeDefined(); + expect(fn?.name).toBe('my_procedure'); + expect(fn?.signature).toBe('(a, b)'); + expect(fn?.docstring).toBe('A standalone procedure.'); + expect(fn?.qualifiedName).toContain('my_procedure'); + }); + + it('extracts call edges from method bodies', () => { + const code = ` +_package sw + +_method my_class.another_method() +\t_return _self.my_method(1, 2) +_endmethod +$ +`; + const result = extractFromSource('my_class.magik', code); + const callRef = result.unresolvedReferences.find( + (r) => r.referenceKind === 'calls' && r.referenceName === 'my_method' + ); + expect(callRef).toBeDefined(); + }); + + it('extracts a private method', () => { + const code = ` +_package sw + +_method my_class.do_internal() +\t_return 42 +_endmethod +$ + +_private _method my_class.secret() +\t_return _self.do_internal() +_endmethod +$ +`; + const result = extractFromSource('my_class.magik', code); + const secretMethod = result.nodes.find((n) => n.name === 'secret'); + expect(secretMethod).toBeDefined(); + expect(secretMethod?.visibility).toBe('private'); + }); +}); diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index ef6307a92..fd7ad9947 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -39,6 +39,7 @@ const WASM_GRAMMAR_FILES: Record = { r: 'tree-sitter-r.wasm', luau: 'tree-sitter-luau.wasm', objc: 'tree-sitter-objc.wasm', + magik: 'tree-sitter-magik.wasm', }; /** @@ -108,6 +109,7 @@ export const EXTENSION_MAP: Record = { '.luau': 'luau', '.m': 'objc', '.mm': 'objc', + '.magik': 'magik', // XML: file-level tracking; the MyBatis extractor matches `` // shape and emits SQL-statement nodes (other XML returns empty). '.xml': 'xml', @@ -216,7 +218,7 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise> = { typescript: typescriptExtractor, @@ -51,4 +52,5 @@ export const EXTRACTORS: Partial> = { r: rExtractor, luau: luauExtractor, objc: objcExtractor, + magik: magikExtractor, }; diff --git a/src/extraction/languages/magik.ts b/src/extraction/languages/magik.ts new file mode 100644 index 000000000..ffb93e4da --- /dev/null +++ b/src/extraction/languages/magik.ts @@ -0,0 +1,274 @@ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getNodeText } from '../tree-sitter-helpers'; +import type { LanguageExtractor } from '../tree-sitter-types'; + +/** + * Magik exemplar-definition functions that create class-like objects. + * These parse as `invoke` nodes with the function name in the `receiver` field + * and the exemplar name as a `:symbol` argument. + */ +const EXEMPLAR_DEFINERS: ReadonlySet = new Set([ + 'define_slotted_exemplar', + 'def_slotted_exemplar', + 'define_mixin', + 'def_mixin', + 'define_pseudo_slot_exemplar', + 'define_indexed_exemplar', +]); + +/** + * Magik receivers that refer to "self" — skip for call resolution since they + * don't aid name-based matching. + */ +const SELF_RECEIVERS: ReadonlySet = new Set([ + '_self', '_super', '_clone', '_thisthread', +]); + +export const magikExtractor: LanguageExtractor = { + // Magik methods are `_method exemplar.name ... _endmethod` — top-level, not + // inside a class body. The exemplarname field provides the receiver type so + // getReceiverType can build the qualified name `exemplar::method_name`. + functionTypes: [], // procedures handled via visitNode + classTypes: [], // exemplars handled via visitNode + methodTypes: [], // methods handled via visitNode (for inline ## docstrings) + interfaceTypes: [], + structTypes: [], + enumTypes: [], + typeAliasTypes: [], + importTypes: [], // _import is block-scope variable sharing, not module imports + callTypes: ['invoke'], // standalone function invocations; `call` handled via visitNode/extractBareCall + variableTypes: [], // _local/_dynamic are function-body locals, not top-level symbols + nameField: 'name', // method: name field = the method name identifier + bodyField: '', // Magik has no explicit body field; resolveBody returns the node itself + paramsField: '', // arguments are unnamed children; getSignature collects them + + // Magik `_package sw` — creates a namespace wrapping all subsequent declarations + packageTypes: ['package'], + extractPackage: (node, source) => { + const text = getNodeText(node, source); + const m = text.match(/_package\s+(\S+)/i); + return m ? m[1]! : null; + }, + + // The `method` node's `exemplarname` field is the class/exemplar name + getReceiverType: (node, source) => { + if (node.type !== 'method') return undefined; + const exemplar = node.childForFieldName('exemplarname'); + return exemplar ? getNodeText(exemplar, source).trim() : undefined; + }, + + // Collect `argument` children for the signature string + getSignature: (node, source) => { + const args: string[] = []; + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'argument') args.push(getNodeText(child, source)); + } + return '(' + args.join(', ') + ')'; + }, + + // Check for `_private` keyword among anonymous children + getVisibility: (node) => { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && !child.isNamed && child.text.toLowerCase() === '_private') { + return 'private'; + } + } + return 'public'; + }, + + // Magik's body IS the method/procedure node — no separate body field. + // Returning the node itself causes visitFunctionBody to walk all children. + resolveBody: (node, _bodyField) => { + if (node.type === 'method' || node.type === 'procedure') return node; + return null; + }, + + // Handle `call` nodes inside function bodies: extract the `message` field + // as the callee name. The framework calls this for any node that is NOT in + // callTypes (so `invoke` nodes go through extractCall, `call` nodes come here). + extractBareCall: (node, source) => { + if (node.type !== 'call') return undefined; + const messageNode = node.childForFieldName('message'); + if (!messageNode) return undefined; + const methodName = getNodeText(messageNode, source).trim(); + if (!methodName) return undefined; + + const receiverNode = node.childForFieldName('receiver'); + if (receiverNode) { + const receiverText = getNodeText(receiverNode, source).trim(); + if (!SELF_RECEIVERS.has(receiverText) && + (receiverNode.type === 'variable' || receiverNode.type === 'identifier')) { + return `${receiverText}.${methodName}`; + } + } + return methodName; + }, + + visitNode: (node, ctx) => { + // ── Methods (_method exemplar.name(args) ... _endmethod) ──────────────── + // Handled here (not via methodTypes) so we can extract inline ## docstrings. + if (node.type === 'method') { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return true; + const name = getNodeText(nameNode, ctx.source).trim(); + if (!name) return true; + + const exemplarNode = node.childForFieldName('exemplarname'); + const exemplarName = exemplarNode ? getNodeText(exemplarNode, ctx.source).trim() : undefined; + + const docNode = node.namedChildren.find((c: SyntaxNode) => c.type === 'documentation'); + const docstring = docNode + ? getNodeText(docNode, ctx.source) + .split('\n') + .map((l: string) => l.replace(/^\s*##\s?/, '')) + .join('\n') + .trim() + : undefined; + + const args: string[] = []; + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'argument') args.push(getNodeText(child, ctx.source)); + } + + let visibility: 'public' | 'private' | undefined; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && !child.isNamed && child.text.toLowerCase() === '_private') { + visibility = 'private'; + break; + } + } + + const extra: Record = { + signature: '(' + args.join(', ') + ')', + docstring, + visibility: visibility ?? 'public', + }; + if (exemplarName) { + extra.qualifiedName = `${exemplarName}::${name}`; + } + + const methodNode = ctx.createNode('method', name, node, extra); + if (methodNode) { + // Create contains edge from the exemplar class node if it exists + if (exemplarName) { + const ownerNode = ctx.nodes.find( + (n) => n.name === exemplarName && n.kind === 'class' && n.filePath === ctx.filePath + ); + if (ownerNode) { + ctx.addUnresolvedReference({ + fromNodeId: ownerNode.id, + referenceName: name, + referenceKind: 'references', + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); + } + } + ctx.pushScope(methodNode.id); + ctx.visitFunctionBody(node, methodNode.id); + ctx.popScope(); + } + return true; + } + + // ── Procedures (_proc @name(args) ... _endproc) ───────────────────────── + if (node.type === 'procedure') { + const labelNode = node.namedChildren.find((c: SyntaxNode) => c.type === 'label'); + const name = labelNode + ? getNodeText(labelNode, ctx.source).replace(/^@\s*/, '').trim() + : ''; + + const docNode = node.namedChildren.find((c: SyntaxNode) => c.type === 'documentation'); + const docstring = docNode + ? getNodeText(docNode, ctx.source) + .split('\n') + .map((l: string) => l.replace(/^\s*##\s?/, '')) + .join('\n') + .trim() + : undefined; + + const args: string[] = []; + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'argument') args.push(getNodeText(child, ctx.source)); + } + + const fn = ctx.createNode('function', name, node, { + signature: '(' + args.join(', ') + ')', + docstring, + }); + if (fn) { + ctx.pushScope(fn.id); + ctx.visitFunctionBody(node, fn.id); + ctx.popScope(); + } + return true; + } + + // ── Exemplar definitions (define_slotted_exemplar, define_mixin, …) ───── + if (node.type === 'invoke') { + const receiverNode = node.childForFieldName('receiver'); + if (!receiverNode) return false; + const fnName = getNodeText(receiverNode, ctx.source).trim(); + + if (EXEMPLAR_DEFINERS.has(fnName)) { + // First :symbol argument is the exemplar (class) name + const symbolNode = node.namedChildren.find((c: SyntaxNode) => c.type === 'symbol'); + if (symbolNode) { + const rawName = getNodeText(symbolNode, ctx.source) + .replace(/^:/, '') + .replace(/^\||\|$/g, '') + .trim(); + if (rawName) { + ctx.createNode('class', rawName, node); + } + } + return true; // handled — don't also emit as a generic call + } + return false; // other invoke nodes → default extractCall path + } + + // ── Top-level call nodes (outside method/procedure bodies) ─────────────── + // In method/procedure bodies, `call` is handled via extractBareCall. + // At the fragment level (e.g. class initialisation code), handle here too. + if (node.type === 'call') { + const callerId = ctx.nodeStack[ctx.nodeStack.length - 1]; + if (callerId) { + const messageNode = node.childForFieldName('message'); + if (messageNode) { + const methodName = getNodeText(messageNode, ctx.source).trim(); + if (methodName) { + const receiverNode = node.childForFieldName('receiver'); + let calleeName = methodName; + if (receiverNode) { + const receiverText = getNodeText(receiverNode, ctx.source).trim(); + if (!SELF_RECEIVERS.has(receiverText) && + (receiverNode.type === 'variable' || receiverNode.type === 'identifier')) { + calleeName = `${receiverText}.${methodName}`; + } + } + ctx.addUnresolvedReference({ + fromNodeId: callerId, + referenceName: calleeName, + referenceKind: 'calls', + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); + } + } + } + // Visit children so nested calls inside arguments are captured + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child) ctx.visitNode(child); + } + return true; + } + + return false; + }, +}; diff --git a/src/extraction/wasm/tree-sitter-magik.wasm b/src/extraction/wasm/tree-sitter-magik.wasm new file mode 100755 index 000000000..21b300049 Binary files /dev/null and b/src/extraction/wasm/tree-sitter-magik.wasm differ diff --git a/src/types.ts b/src/types.ts index 656bb1090..3bee2be48 100644 --- a/src/types.ts +++ b/src/types.ts @@ -91,6 +91,7 @@ export const LANGUAGES = [ 'luau', 'objc', 'r', + 'magik', 'yaml', 'twig', 'xml',