diff --git a/CHANGELOG.md b/CHANGELOG.md index a3252b7..26f7ed7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- Neo4j-backed TypeScript analysis backend (`cldk.analysis.typescript.neo4j.TSNeo4jBackend`). It + is a drop-in alternative to the in-memory `TSCodeanalyzer`: it answers the **same** `get_*` + query surface (call graph, callers/callees, class hierarchy, call sites, decorators, symbol + lookups, ...) by running **Cypher over a live Neo4j graph** instead of walking the pydantic / + NetworkX structures. The graph is the one `codeanalyzer-typescript` emits with `--emit neo4j` + (schema `schema.neo4j.json`); the backend can populate the database for you over Bolt, or query + one that is already loaded. +- `TypeScriptAnalysis` / `CLDK.analysis(language="typescript")` now accept an optional + `neo4j_config` (`Neo4jConnectionConfig`) to select the Neo4j backend; without it the in-memory + backend is used, unchanged. +- Optional `neo4j` extra (`pip install cldk[neo4j]`) for the Neo4j Python driver. + ## [v1.0.7] - 2026-02-14 ### Added diff --git a/cldk/analysis/java/backend.py b/cldk/analysis/java/backend.py new file mode 100644 index 0000000..b969b72 --- /dev/null +++ b/cldk/analysis/java/backend.py @@ -0,0 +1,213 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""The Java analysis backend contract. + +:class:`JavaAnalysis` is a (mostly) thin façade that delegates its static-analysis queries to a +*backend*. Today the only backend is :class:`~cldk.analysis.java.codeanalyzer.JCodeanalyzer` +(in-memory pydantic / NetworkX over the codeanalyzer JSON); this ABC formalizes the surface the +façade depends on so an alternative backend (e.g. a forthcoming Neo4j/Cypher backend, mirroring +the TypeScript :class:`~cldk.analysis.typescript.neo4j.TSNeo4jBackend`) can be dropped in and +selected without touching the façade. + +The contract is enforced by the type system and at instantiation time rather than matching only by +convention. Note the façade also calls Tree-sitter directly for a few parsing/sanitization helpers +(e.g. ``is_parsable``, ``get_raw_ast``); those are not part of the backend contract — only the +analysis queries the façade routes through ``self.backend`` are. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Dict, List, Tuple, Union + +import networkx as nx + +from cldk.models.java.models import ( + JApplication, + JCallable, + JCallableParameter, + JComment, + JCompilationUnit, + JCRUDOperation, + JField, + JMethodDetail, + JType, +) + +# A CRUD query row: the owning type + callable and the operations found within it. +CRUDRow = Dict[str, Union[JType, JCallable, List[JCRUDOperation]]] + + +class JavaAnalysisBackend(ABC): + """Abstract base every Java analysis backend implements. + + A backend owns all indexing and query logic for a Java application (symbol table, call graph, + class/method/field navigation, entry points, CRUD operations, comments/docstrings); the + :class:`JavaAnalysis` façade delegates to it. Implementations must return the canonical + ``cldk.models.java`` pydantic objects (or the documented NetworkX / dict / list shapes) so + backends are behaviorally interchangeable. + """ + + # -----[ application / whole-program ]----- + @abstractmethod + def get_application_view(self) -> JApplication: + """The whole application view.""" + + @abstractmethod + def get_symbol_table(self) -> Dict[str, JCompilationUnit]: + """The per-file symbol table, keyed by file path.""" + + @abstractmethod + def get_compilation_units(self) -> List[JCompilationUnit]: + """All compilation units.""" + + @abstractmethod + def get_java_file(self, qualified_class_name: str) -> str: + """The file path declaring a class.""" + + @abstractmethod + def get_java_compilation_unit(self, file_path: str) -> JCompilationUnit: + """The compilation unit for a file path.""" + + # -----[ call graph ]----- + @abstractmethod + def get_call_graph(self) -> nx.DiGraph: + """NetworkX DiGraph of the application's call edges.""" + + @abstractmethod + def get_call_graph_json(self) -> str: + """The call graph serialized as JSON.""" + + @abstractmethod + def get_all_callers(self, target_class_name: str, target_method_signature: str, using_symbol_table: bool) -> Dict: + """Callers of a method.""" + + @abstractmethod + def get_all_callees(self, source_class_name: str, source_method_signature: str, using_symbol_table: bool) -> Dict: + """Callees of a method.""" + + @abstractmethod + def get_class_call_graph(self, qualified_class_name: str, method_name: str | None = None) -> List[Tuple[JMethodDetail, JMethodDetail]]: + """Call-graph edges reachable from a class (or one of its methods).""" + + @abstractmethod + def get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> List[Tuple[JMethodDetail, JMethodDetail]]: + """Call-graph edges reachable from a class, computed from the symbol table only.""" + + # -----[ classes / methods / fields ]----- + @abstractmethod + def get_all_classes(self) -> Dict[str, JType]: + """Every class, keyed by qualified name.""" + + @abstractmethod + def get_class(self, qualified_class_name: str) -> JType: + """A single class by qualified name.""" + + @abstractmethod + def get_all_sub_classes(self, qualified_class_name: str) -> Dict[str, JType]: + """Classes that extend/implement the given class.""" + + @abstractmethod + def get_all_nested_classes(self, qualified_class_name: str) -> List[JType]: + """The classes declared inside a class.""" + + @abstractmethod + def get_extended_classes(self, qualified_class_name: str) -> List[str]: + """The base classes a class extends.""" + + @abstractmethod + def get_implemented_interfaces(self, qualified_class_name: str) -> List[str]: + """The interfaces a class implements.""" + + @abstractmethod + def get_all_methods_in_application(self) -> Dict[str, Dict[str, JCallable]]: + """All methods grouped by their owning class qualified name.""" + + @abstractmethod + def get_all_methods_in_class(self, qualified_class_name: str) -> Dict[str, JCallable]: + """The methods of a class.""" + + @abstractmethod + def get_method(self, qualified_class_name: str, method_signature: str) -> JCallable: + """A single method of a class.""" + + @abstractmethod + def get_method_parameters(self, qualified_class_name: str, method_signature: str) -> List[JCallableParameter]: + """The parameters of a method.""" + + @abstractmethod + def get_all_constructors(self, qualified_class_name: str) -> Dict[str, JCallable]: + """The constructors of a class.""" + + @abstractmethod + def get_all_fields(self, qualified_class_name: str) -> List[JField]: + """The fields of a class.""" + + # -----[ entry points ]----- + @abstractmethod + def get_all_entry_point_methods(self) -> Dict[str, Dict[str, JCallable]]: + """Methods identified as application entry points.""" + + @abstractmethod + def get_all_entry_point_classes(self) -> Dict[str, JType]: + """Classes identified as application entry points.""" + + # -----[ CRUD operations ]----- + @abstractmethod + def get_all_crud_operations(self) -> List[CRUDRow]: + """All CRUD operations across the application.""" + + @abstractmethod + def get_all_create_operations(self) -> List[CRUDRow]: + """All create operations.""" + + @abstractmethod + def get_all_read_operations(self) -> List[CRUDRow]: + """All read operations.""" + + @abstractmethod + def get_all_update_operations(self) -> List[CRUDRow]: + """All update operations.""" + + @abstractmethod + def get_all_delete_operations(self) -> List[CRUDRow]: + """All delete operations.""" + + # -----[ comments / docstrings ]----- + @abstractmethod + def get_all_comments(self) -> Dict[str, List[JComment]]: + """All comments across the application, keyed by file.""" + + @abstractmethod + def get_comment_in_file(self, file_path: str) -> List[JComment]: + """The comments in a file.""" + + @abstractmethod + def get_comments_in_a_class(self, qualified_class_name: str) -> List[JComment]: + """The comments in a class.""" + + @abstractmethod + def get_comments_in_a_method(self, qualified_class_name: str, method_signature: str) -> List[JComment]: + """The comments in a method.""" + + @abstractmethod + def get_all_docstrings(self) -> List[Tuple[str, JComment]]: + """All docstring-style comments across the application.""" + + @abstractmethod + def remove_all_comments(self, src_code: str) -> str: + """Strip all comments from the given source code.""" diff --git a/cldk/analysis/java/codeanalyzer/codeanalyzer.py b/cldk/analysis/java/codeanalyzer/codeanalyzer.py index d1348b5..3810b7e 100644 --- a/cldk/analysis/java/codeanalyzer/codeanalyzer.py +++ b/cldk/analysis/java/codeanalyzer/codeanalyzer.py @@ -28,6 +28,7 @@ import networkx as nx from cldk.analysis import AnalysisLevel +from cldk.analysis.java.backend import JavaAnalysisBackend from cldk.analysis.commons.treesitter import TreesitterJava from cldk.models.java import JGraphEdges from cldk.models.java.enums import CRUDOperationType @@ -37,7 +38,7 @@ logger = logging.getLogger(__name__) -class JCodeanalyzer: +class JCodeanalyzer(JavaAnalysisBackend): """A class for building the application view of a Java application using Codeanalyzer. Args: diff --git a/cldk/analysis/java/java_analysis.py b/cldk/analysis/java/java_analysis.py index 91ef5e1..6348dc9 100644 --- a/cldk/analysis/java/java_analysis.py +++ b/cldk/analysis/java/java_analysis.py @@ -57,6 +57,7 @@ from cldk.models.java import JApplication from cldk.models.java.models import JCRUDOperation, JComment, JCompilationUnit, JMethodDetail, JType, JField from cldk.analysis.java.codeanalyzer import JCodeanalyzer +from cldk.analysis.java.backend import JavaAnalysisBackend class JavaAnalysis: @@ -154,7 +155,7 @@ def __init__( self.target_files = target_files self.treesitter_java: TreesitterJava = TreesitterJava() # Initialize the analysis analysis_backend - self.backend: JCodeanalyzer = JCodeanalyzer( + self.backend: JavaAnalysisBackend = JCodeanalyzer( project_dir=self.project_dir, source_code=self.source_code, eager_analysis=self.eager_analysis, diff --git a/cldk/analysis/python/backend.py b/cldk/analysis/python/backend.py new file mode 100644 index 0000000..46c6262 --- /dev/null +++ b/cldk/analysis/python/backend.py @@ -0,0 +1,141 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""The Python analysis backend contract. + +:class:`PythonAnalysis` is a thin façade that delegates every query to a *backend*. Today the only +backend is :class:`~cldk.analysis.python.codeanalyzer.PyCodeanalyzer` (in-memory pydantic / +NetworkX over ``analysis.json``); this ABC formalizes the surface the façade depends on so an +alternative backend (e.g. a forthcoming Neo4j/Cypher backend, mirroring the TypeScript +:class:`~cldk.analysis.typescript.neo4j.TSNeo4jBackend`) can be dropped in and selected without +touching the façade. + +The contract is enforced by the type system and at instantiation time rather than matching only +by convention. Backend-specific lifecycle (caches, drivers) is intentionally not part of it. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Dict, List, Tuple + +import networkx as nx + +from cldk.models.python import ( + PyApplication, + PyCallable, + PyClass, + PyClassAttribute, + PyModule, +) + + +class PythonAnalysisBackend(ABC): + """Abstract base every Python analysis backend implements. + + A backend owns all indexing and query logic for a Python application; the + :class:`PythonAnalysis` façade is a one-line-delegation shim over it. Implementations must + return the canonical ``cldk.models.python`` pydantic objects (or the documented + NetworkX / dict / list shapes) so backends are behaviorally interchangeable. + """ + + # -----[ application / whole-program ]----- + @abstractmethod + def get_application_view(self) -> PyApplication: + """The whole application view (symbol table + call graph).""" + + @abstractmethod + def get_symbol_table(self) -> Dict[str, PyModule]: + """The per-file symbol table, keyed by module file path.""" + + @abstractmethod + def get_modules(self) -> List[PyModule]: + """All modules.""" + + @abstractmethod + def get_python_module(self, file_path: str) -> PyModule | None: + """The module for a file path.""" + + @abstractmethod + def get_python_file(self, qualified_class_name: str) -> str | None: + """The file path declaring the given symbol.""" + + # -----[ call graph ]----- + @abstractmethod + def get_call_graph(self) -> nx.DiGraph: + """NetworkX DiGraph of the application's call edges.""" + + @abstractmethod + def get_call_graph_json(self) -> str: + """The application serialized as JSON.""" + + @abstractmethod + def get_all_callers(self, target_class_name: str, target_method_declaration: str) -> Dict: + """Callers of a method, with the connecting call-graph edge metadata.""" + + @abstractmethod + def get_all_callees(self, source_class_name: str, source_method_declaration: str) -> Dict: + """Callees of a method, with the connecting call-graph edge metadata.""" + + @abstractmethod + def get_class_call_graph(self, qualified_class_name: str, method_signature: str | None = None) -> List[Tuple[str, str]]: + """Call-graph edges reachable from a class (or one of its methods).""" + + # -----[ classes ]----- + @abstractmethod + def get_all_classes(self) -> Dict[str, PyClass]: + """Every class, keyed by signature.""" + + @abstractmethod + def get_class(self, qualified_class_name: str) -> PyClass | None: + """A single class by signature.""" + + @abstractmethod + def get_all_nested_classes(self, qualified_class_name: str) -> List[PyClass]: + """The classes declared inside a class.""" + + @abstractmethod + def get_all_sub_classes(self, qualified_class_name: str) -> Dict[str, PyClass]: + """Classes that extend the given class.""" + + @abstractmethod + def get_extended_classes(self, qualified_class_name: str) -> List[str]: + """The base types a class extends.""" + + # -----[ methods / fields ]----- + @abstractmethod + def get_all_methods_in_application(self) -> Dict[str, Dict[str, PyCallable]]: + """All methods grouped by their owning class signature.""" + + @abstractmethod + def get_all_methods_in_class(self, qualified_class_name: str) -> Dict[str, PyCallable]: + """The methods of a class.""" + + @abstractmethod + def get_method(self, qualified_class_name: str, qualified_method_name: str) -> PyCallable | None: + """A single method of a class.""" + + @abstractmethod + def get_method_parameters(self, qualified_class_name: str, qualified_method_name: str) -> List[str]: + """The parameter names of a method.""" + + @abstractmethod + def get_all_constructors(self, qualified_class_name: str) -> Dict[str, PyCallable]: + """The constructors of a class.""" + + @abstractmethod + def get_all_fields(self, qualified_class_name: str) -> List[PyClassAttribute]: + """The attributes/fields of a class.""" diff --git a/cldk/analysis/python/codeanalyzer/codeanalyzer.py b/cldk/analysis/python/codeanalyzer/codeanalyzer.py index 7104711..c5fd4f3 100644 --- a/cldk/analysis/python/codeanalyzer/codeanalyzer.py +++ b/cldk/analysis/python/codeanalyzer/codeanalyzer.py @@ -61,6 +61,7 @@ from codeanalyzer.schema import model_dump_json from cldk.analysis import AnalysisLevel +from cldk.analysis.python.backend import PythonAnalysisBackend from cldk.models.python import ( PyApplication, PyCallEdge, @@ -74,7 +75,7 @@ logger = logging.getLogger(__name__) -class PyCodeanalyzer: +class PyCodeanalyzer(PythonAnalysisBackend): """In-process driver for the ``codeanalyzer-python`` analysis backend. This class serves as the primary interface to the codeanalyzer-python diff --git a/cldk/analysis/python/python_analysis.py b/cldk/analysis/python/python_analysis.py index 3f20665..c77c79a 100644 --- a/cldk/analysis/python/python_analysis.py +++ b/cldk/analysis/python/python_analysis.py @@ -54,6 +54,7 @@ from tree_sitter import Tree from cldk.analysis.commons.treesitter import TreesitterPython +from cldk.analysis.python.backend import PythonAnalysisBackend from cldk.analysis.python.codeanalyzer import PyCodeanalyzer from cldk.models.python import ( PyApplication, @@ -162,7 +163,7 @@ def __init__( self.eager_analysis = eager_analysis self.target_files = target_files self.treesitter_python: TreesitterPython = TreesitterPython() - self.backend: PyCodeanalyzer = PyCodeanalyzer( + self.backend: PythonAnalysisBackend = PyCodeanalyzer( project_dir=project_dir, analysis_level=analysis_level, analysis_json_path=analysis_json_path, diff --git a/cldk/analysis/typescript/__init__.py b/cldk/analysis/typescript/__init__.py new file mode 100644 index 0000000..c4ba1d9 --- /dev/null +++ b/cldk/analysis/typescript/__init__.py @@ -0,0 +1,19 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from cldk.analysis.typescript.typescript_analysis import TypeScriptAnalysis + +__all__ = ["TypeScriptAnalysis"] diff --git a/cldk/analysis/typescript/backend.py b/cldk/analysis/typescript/backend.py new file mode 100644 index 0000000..9f3a275 --- /dev/null +++ b/cldk/analysis/typescript/backend.py @@ -0,0 +1,237 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""The TypeScript analysis backend contract. + +:class:`TypeScriptAnalysis` is a thin façade that delegates every query to a *backend*. Two +interchangeable backends exist: + +* :class:`~cldk.analysis.typescript.codeanalyzer.TSCodeanalyzer` — walks the in-memory pydantic + ``TSApplication`` / a NetworkX call graph built from ``analysis.json``; +* :class:`~cldk.analysis.typescript.neo4j.TSNeo4jBackend` — answers the *same* queries with + Cypher over the graph ``codeanalyzer-typescript`` emits with ``--emit neo4j``. + +This ABC formalizes the surface those two share so the façade↔backend relationship is enforced by +the type system (and at instantiation time) instead of matching only by convention. Both backends +subclass it; the façade is typed against it. Backend-specific lifecycle (e.g. the Neo4j driver's +``close()`` / context-manager support) is intentionally *not* part of the contract. + +The vocabulary mirrors :class:`~cldk.analysis.java.codeanalyzer.JCodeanalyzer` / +:class:`~cldk.analysis.python.codeanalyzer.PyCodeanalyzer`, but the node kinds are TypeScript-native +(interfaces, type aliases, enums, namespaces, decorators, ...). +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Dict, List, Set, Tuple + +import networkx as nx + +from cldk.models.typescript import ( + TSApplication, + TSCallable, + TSCallsite, + TSClass, + TSClassAttribute, + TSDecorator, + TSEnum, + TSEnumMember, + TSExport, + TSExternalSymbol, + TSImport, + TSInterface, + TSModule, + TSTypeAlias, + TSVariableDeclaration, +) + + +class TSAnalysisBackend(ABC): + """Abstract base every TypeScript analysis backend implements. + + A backend owns *all* indexing and query logic for a TypeScript application; the + :class:`TypeScriptAnalysis` façade is a one-line-delegation shim over it. Implementations must + return the canonical ``cldk.models.typescript`` pydantic objects (or the documented + NetworkX / dict / list shapes) so the two backends are behaviorally interchangeable. + """ + + # -----[ application / whole-program ]----- + @abstractmethod + def get_application(self) -> TSApplication: + """The whole application view (symbol table + call graph + external symbols).""" + + @abstractmethod + def get_symbol_table(self) -> Dict[str, TSModule]: + """The per-file symbol table, keyed by module file path.""" + + @abstractmethod + def get_modules(self) -> List[TSModule]: + """All modules (compilation units).""" + + @abstractmethod + def get_external_symbols(self) -> Dict[str, TSExternalSymbol]: + """Phantom (external) call targets — imported/required library members.""" + + @abstractmethod + def get_typescript_file(self, qualified_name: str) -> str | None: + """The file path declaring the symbol with the given signature.""" + + @abstractmethod + def get_typescript_module(self, file_path: str) -> TSModule | None: + """The module for a file path.""" + + # -----[ call graph ]----- + @abstractmethod + def get_call_graph(self) -> nx.DiGraph: + """NetworkX DiGraph of callable signatures (and phantom external symbols) + call edges.""" + + @abstractmethod + def get_call_graph_json(self) -> str: + """The application serialized as JSON.""" + + @abstractmethod + def get_all_callers(self, target_class_name: str, target_method_declaration: str | None = None) -> Dict: + """Callers of a method, with the connecting call-graph edge metadata.""" + + @abstractmethod + def get_all_callees(self, source_class_name: str, source_method_declaration: str | None = None) -> Dict: + """Callees of a method, with the connecting call-graph edge metadata.""" + + @abstractmethod + def get_class_call_graph(self, qualified_class_name: str, method_signature: str | None = None) -> List[Tuple[str, str]]: + """Call-graph edges reachable from a class (or one of its methods).""" + + @abstractmethod + def get_class_hierarchy(self) -> nx.DiGraph: + """Inheritance/implementation graph: an edge child → base for every base class.""" + + # -----[ call sites ]----- + @abstractmethod + def get_call_sites(self, qualified_callable_name: str) -> List[TSCallsite]: + """The rich, syntactic call sites inside a callable.""" + + @abstractmethod + def get_calling_lines(self, target_signature: str) -> List[int]: + """Sorted source lines anywhere in the project where ``target_signature`` is invoked.""" + + @abstractmethod + def get_call_targets(self, source_signature: str) -> Set[str]: + """The call targets invoked from a callable, derived from its call sites.""" + + # -----[ classes / interfaces / enums / type-aliases ]----- + @abstractmethod + def get_all_classes(self) -> Dict[str, TSClass]: + """Every class, keyed by signature.""" + + @abstractmethod + def get_class(self, qualified_class_name: str) -> TSClass | None: + """A single class by signature.""" + + @abstractmethod + def get_all_interfaces(self) -> Dict[str, TSInterface]: + """Every interface, keyed by signature.""" + + @abstractmethod + def get_all_enums(self) -> Dict[str, TSEnum]: + """Every enum, keyed by signature.""" + + @abstractmethod + def get_enum_members(self, qualified_enum_name: str) -> List[TSEnumMember]: + """The members of an enum.""" + + @abstractmethod + def get_all_type_aliases(self) -> Dict[str, TSTypeAlias]: + """Every type alias, keyed by signature.""" + + @abstractmethod + def get_all_nested_classes(self, qualified_class_name: str) -> List[TSClass]: + """The classes declared inside a class.""" + + @abstractmethod + def get_all_sub_classes(self, qualified_class_name: str) -> Dict[str, TSClass]: + """Classes that extend/implement the given class.""" + + @abstractmethod + def get_extended_classes(self, qualified_class_name: str) -> List[str]: + """The base types a class extends (base classes minus implemented interfaces).""" + + @abstractmethod + def get_implemented_interfaces(self, qualified_class_name: str) -> List[str]: + """The interfaces a class implements.""" + + # -----[ methods / functions / fields ]----- + @abstractmethod + def get_all_methods_in_application(self) -> Dict[str, Dict[str, TSCallable]]: + """All methods grouped by their owning class/interface signature.""" + + @abstractmethod + def get_all_methods_in_class(self, qualified_class_name: str) -> Dict[str, TSCallable]: + """The methods of a class/interface, keyed by short name.""" + + @abstractmethod + def get_method(self, qualified_class_name: str, qualified_method_name: str) -> TSCallable | None: + """A single method of a class/interface.""" + + @abstractmethod + def get_method_parameters(self, qualified_class_name: str, qualified_method_name: str) -> List[str]: + """The parameter names of a method.""" + + @abstractmethod + def get_all_constructors(self, qualified_class_name: str) -> Dict[str, TSCallable]: + """The constructors of a class.""" + + @abstractmethod + def get_all_functions(self) -> Dict[str, TSCallable]: + """Top-level (module/namespace) functions, keyed by signature.""" + + @abstractmethod + def get_all_fields(self, qualified_class_name: str) -> List[TSClassAttribute]: + """The attributes/fields of a class.""" + + @abstractmethod + def get_interface_properties(self, qualified_interface_name: str) -> List[TSClassAttribute]: + """The properties of an interface.""" + + # -----[ imports / exports / variables ]----- + @abstractmethod + def get_imports(self) -> Dict[str, List[TSImport]]: + """Per-file import bindings.""" + + @abstractmethod + def get_all_exports(self) -> Dict[str, List[TSExport]]: + """Per-file export bindings.""" + + @abstractmethod + def get_all_variables(self) -> Dict[str, List[TSVariableDeclaration]]: + """Per-file module-level variable declarations.""" + + # -----[ decorators ]----- + @abstractmethod + def get_decorators(self, qualified_callable_name: str) -> List[TSDecorator]: + """Structured decorators applied to a callable.""" + + @abstractmethod + def get_class_decorators(self, qualified_class_name: str) -> List[TSDecorator]: + """Structured decorators applied to a class.""" + + @abstractmethod + def get_methods_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + """Map each requested decorator name to the signatures of callables carrying it.""" + + @abstractmethod + def get_classes_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + """Map each requested decorator name to the signatures of classes carrying it.""" diff --git a/cldk/analysis/typescript/codeanalyzer/__init__.py b/cldk/analysis/typescript/codeanalyzer/__init__.py new file mode 100644 index 0000000..e402c4f --- /dev/null +++ b/cldk/analysis/typescript/codeanalyzer/__init__.py @@ -0,0 +1,19 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from .codeanalyzer import TSCodeanalyzer + +__all__ = ["TSCodeanalyzer"] diff --git a/cldk/analysis/typescript/codeanalyzer/bin/README.md b/cldk/analysis/typescript/codeanalyzer/bin/README.md new file mode 100644 index 0000000..18e5cbf --- /dev/null +++ b/cldk/analysis/typescript/codeanalyzer/bin/README.md @@ -0,0 +1,16 @@ +# codeanalyzer-typescript binary + +This directory is the optional drop location for the compiled `codeanalyzer-typescript` +backend binary (built from the `codeanalyzer-ts` repo with `bun build --compile`). + +The SDK wrapper (`cldk/analysis/typescript/codeanalyzer/codeanalyzer.py`) resolves the binary in +this order: + +1. `analysis_backend_path=` passed to `CLDK("typescript").analysis(...)` (rglob'd here). +2. `$CODEANALYZER_TS_BIN` environment variable. +3. A binary named `codeanalyzer-typescript*` placed in **this** directory (bundled in the wheel). + +The binary is platform-specific and ~70 MB, so it is **not** committed to the repo. Build it +with `bun build ./src/index.ts --compile --outfile dist/codeanalyzer-typescript` and copy it +here (or point `analysis_backend_path` at it). The pinned version is recorded under +`[tool.backend-versions] codeanalyzer-typescript` in `pyproject.toml`. diff --git a/cldk/analysis/typescript/codeanalyzer/bin/__init__.py b/cldk/analysis/typescript/codeanalyzer/bin/__init__.py new file mode 100644 index 0000000..3840d6d --- /dev/null +++ b/cldk/analysis/typescript/codeanalyzer/bin/__init__.py @@ -0,0 +1 @@ +# Package marker so the optional bundled binary is resolvable via importlib.resources. diff --git a/cldk/analysis/typescript/codeanalyzer/codeanalyzer.py b/cldk/analysis/typescript/codeanalyzer/codeanalyzer.py new file mode 100644 index 0000000..bc1cb83 --- /dev/null +++ b/cldk/analysis/typescript/codeanalyzer/codeanalyzer.py @@ -0,0 +1,537 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""TypeScript Codeanalyzer backend wrapper. + +Subprocess wrapper around the ``codeanalyzer-typescript`` binary (built from ``codeanalyzer-ts`` +with ``bun build --compile``). Mirrors the Java ``JCodeanalyzer`` / Python ``PyCodeanalyzer`` +pattern: shell out to the analyzer, read ``analysis.json`` from stdout (or an output dir), +validate it into a ``TSApplication`` pydantic model, **and own all query/indexing logic**. The +``TypeScriptAnalysis`` facade is a thin delegating shell over this backend. +""" + +from __future__ import annotations + +import json +import logging +import os +import shlex +import subprocess +from collections import deque +from importlib import resources +from pathlib import Path +from subprocess import CompletedProcess +from typing import Dict, List, Set, Tuple, Union + +import networkx as nx + +from cldk.analysis import AnalysisLevel +from cldk.analysis.typescript.backend import TSAnalysisBackend +from cldk.models.typescript import ( + TSApplication, + TSCallable, + TSCallsite, + TSClass, + TSClassAttribute, + TSDecorator, + TSEnum, + TSEnumMember, + TSExport, + TSExternalSymbol, + TSImport, + TSInterface, + TSModule, + TSNamespace, + TSTypeAlias, + TSVariableDeclaration, +) +from cldk.utils.exceptions.exceptions import CodeanalyzerExecutionException + +logger = logging.getLogger(__name__) + + +class TSCodeanalyzer(TSAnalysisBackend): + """Build and query the application view of a TypeScript project by invoking the + codeanalyzer-typescript binary as a subprocess. + + This backend owns all indexing and query logic (symbol lookups, the NetworkX call graph, + class hierarchy, call sites, entrypoints, decorators, ...). The :class:`TypeScriptAnalysis` + facade simply delegates to it, mirroring how :class:`PythonAnalysis` delegates to + :class:`PyCodeanalyzer`. + + Args: + project_dir: Path to the root of the TypeScript project. + analysis_backend_path: Directory containing the ``codeanalyzer-typescript`` binary. If + None, falls back to ``$CODEANALYZER_TS_BIN`` then a bundled binary. + analysis_json_path: Directory to persist ``analysis.json``. If None, output is read from + the subprocess stdout pipe. + analysis_level: ``AnalysisLevel.symbol_table`` (1) or ``AnalysisLevel.call_graph`` (2). + eager_analysis: If True, re-run the analyzer even if a cached ``analysis.json`` exists. + target_files: Restrict analysis to these files (incremental). + """ + + def __init__( + self, + project_dir: Union[str, Path], + analysis_backend_path: Union[str, Path, None], + analysis_json_path: Union[str, Path, None], + analysis_level: str, + eager_analysis: bool, + target_files: List[str] | None, + ) -> None: + self.project_dir = project_dir + self.analysis_backend_path = analysis_backend_path + self.analysis_json_path = analysis_json_path + self.analysis_level = analysis_level + self.eager_analysis = eager_analysis + self.target_files = target_files + self.application: TSApplication = self._init_codeanalyzer( + analysis_level=1 if analysis_level == AnalysisLevel.symbol_table else 2 + ) + self._call_graph: nx.DiGraph | None = None + self._index() + + # -----[ binary resolution ]----- + def _get_codeanalyzer_exec(self) -> List[str]: + """Resolve the codeanalyzer-typescript executable command.""" + if self.analysis_backend_path: + backend = Path(self.analysis_backend_path) + logger.info(f"Using codeanalyzer-typescript from {backend}") + binary = next( + (p for p in backend.rglob("codeanalyzer-typescript*") if p.is_file()), + None, + ) or next((p for p in backend.rglob("codeanalyzer-ts*") if p.is_file()), None) + if binary is None: + raise CodeanalyzerExecutionException( + "codeanalyzer-typescript binary not found in the provided analysis_backend_path." + ) + return [str(binary)] + + env_bin = os.environ.get("CODEANALYZER_TS_BIN") + if env_bin: + return shlex.split(env_bin) + + # Prebuilt binary from the `codeanalyzer-typescript` PyPI package (platform wheel), + # mirroring how the Python backend depends on `codeanalyzer-python`. + try: + import codeanalyzer_typescript + + return [str(codeanalyzer_typescript.bin_path())] + except (ModuleNotFoundError, FileNotFoundError): + pass + + # Bundled binary (if shipped with the wheel). + try: + with resources.as_file(resources.files("cldk.analysis.typescript.codeanalyzer.bin")) as bin_dir: + binary = next((p for p in bin_dir.iterdir() if p.is_file() and p.name.startswith("codeanalyzer")), None) + if binary is not None: + return [str(binary)] + except (ModuleNotFoundError, FileNotFoundError): + pass + + raise CodeanalyzerExecutionException( + "codeanalyzer-typescript binary not found. Pass analysis_backend_path=, set $CODEANALYZER_TS_BIN, or bundle it under cldk/analysis/typescript/codeanalyzer/bin/." + ) + + @staticmethod + def _init_tsapplication(data: str) -> TSApplication: + """Build a TSApplication from a stringified analysis.json.""" + return TSApplication(**json.loads(data)) + + def _init_codeanalyzer(self, analysis_level: int = 1) -> TSApplication: + """Run the analyzer and return the validated TSApplication.""" + codeanalyzer_exec = self._get_codeanalyzer_exec() + target_args: List[str] = [] + if self.target_files: + for tf in self.target_files: + target_args += ["-t", str(tf).strip()] + + if self.analysis_json_path is None: + # Read compact JSON from the stdout pipe. + args = codeanalyzer_exec + ["-i", str(Path(self.project_dir)), "-a", str(analysis_level)] + target_args + try: + logger.info(f"Running codeanalyzer-typescript: {' '.join(args)}") + console_out: CompletedProcess[str] = subprocess.run( + args, capture_output=True, text=True, check=True + ) + return self._init_tsapplication(console_out.stdout) + except Exception as e: # noqa: BLE001 + raise CodeanalyzerExecutionException(str(e)) from e + + # Persist to an output directory and read analysis.json back. + analysis_json_file = Path(self.analysis_json_path).joinpath("analysis.json") + needs_run = self.eager_analysis or not analysis_json_file.exists() or bool(self.target_files) + if needs_run: + args = ( + codeanalyzer_exec + + ["-i", str(Path(self.project_dir)), "-a", str(analysis_level), "-o", str(self.analysis_json_path)] + + target_args + ) + try: + logger.info(f"Running codeanalyzer-typescript: {' '.join(args)}") + subprocess.run(args, capture_output=True, text=True, check=True) + if not analysis_json_file.exists(): + raise CodeanalyzerExecutionException("codeanalyzer-typescript did not generate analysis.json.") + except Exception as e: # noqa: BLE001 + raise CodeanalyzerExecutionException(str(e)) from e + with open(analysis_json_file, encoding="utf-8") as f: + return self._init_tsapplication(json.dumps(json.load(f))) + + # -----[ indexing ]----- + def _index(self) -> None: + """Flatten the (recursive) symbol table into signature-keyed lookups, built once.""" + self._classes: Dict[str, TSClass] = {} + self._interfaces: Dict[str, TSInterface] = {} + self._enums: Dict[str, TSEnum] = {} + self._type_aliases: Dict[str, TSTypeAlias] = {} + self._callables: Dict[str, TSCallable] = {} + self._functions: Dict[str, TSCallable] = {} + self._methods_by_class: Dict[str, Dict[str, TSCallable]] = {} + self._file_of: Dict[str, str] = {} + + for fp, mod in self.application.symbol_table.items(): + for f in mod.functions.values(): + self._add_callable(f, fp) + self._functions[f.signature] = f + for cl in mod.classes.values(): + self._add_class(cl, fp) + for it in mod.interfaces.values(): + self._add_interface(it, fp) + for en in mod.enums.values(): + self._enums[en.signature] = en + self._file_of[en.signature] = fp + for ta in mod.type_aliases.values(): + self._type_aliases[ta.signature] = ta + self._file_of[ta.signature] = fp + for ns in mod.namespaces.values(): + self._add_namespace(ns, fp) + + def _add_callable(self, c: TSCallable, fp: str) -> None: + self._callables[c.signature] = c + self._file_of[c.signature] = fp + for ic in c.inner_callables.values(): + self._add_callable(ic, fp) + for cl in c.inner_classes.values(): + self._add_class(cl, fp) + + def _add_class(self, cl: TSClass, fp: str) -> None: + self._classes[cl.signature] = cl + self._file_of[cl.signature] = fp + methods: Dict[str, TSCallable] = {} + for m in cl.methods.values(): + self._add_callable(m, fp) + methods[m.name] = m + self._methods_by_class[cl.signature] = methods + for ic in cl.inner_classes.values(): + self._add_class(ic, fp) + + def _add_interface(self, it: TSInterface, fp: str) -> None: + self._interfaces[it.signature] = it + self._file_of[it.signature] = fp + methods: Dict[str, TSCallable] = {} + for m in it.methods.values(): + self._add_callable(m, fp) + methods[m.name] = m + self._methods_by_class[it.signature] = methods + + def _add_namespace(self, ns: TSNamespace, fp: str) -> None: + for f in ns.functions.values(): + self._add_callable(f, fp) + self._functions[f.signature] = f + for cl in ns.classes.values(): + self._add_class(cl, fp) + for it in ns.interfaces.values(): + self._add_interface(it, fp) + for en in ns.enums.values(): + self._enums[en.signature] = en + self._file_of[en.signature] = fp + for ta in ns.type_aliases.values(): + self._type_aliases[ta.signature] = ta + self._file_of[ta.signature] = fp + for n in ns.namespaces.values(): + self._add_namespace(n, fp) + + def _resolve_callable(self, class_or_module: str, method: str | None = None) -> TSCallable | None: + """Resolve a callable from either a full signature (``method is None``) or a + ``(class/module, member)`` pair. Mirrors :meth:`PyCodeanalyzer.get_method` resolution.""" + if method is None: + return self._callables.get(class_or_module) + # method grouped under a class/interface signature + members = self._methods_by_class.get(class_or_module, {}) + if method in members: + return members[method] + # by short name within the class/interface + for m in members.values(): + if m.name == method: + return m + # module/namespace-level function addressed as "." + composed = f"{class_or_module}.{method}" + if composed in self._callables: + return self._callables[composed] + return None + + def _resolve_signature(self, class_or_sig: str, member: str | None = None) -> str: + """Resolve a ``(class/module, member)`` pair (or a bare signature) to a signature string. + Falls back to the composed/literal string so external (phantom) targets still match.""" + if member is None: + return class_or_sig + callable_ = self._resolve_callable(class_or_sig, member) + return callable_.signature if callable_ else f"{class_or_sig}.{member}" + + # -----[ application / whole-program ]----- + def get_application(self) -> TSApplication: + return self.application + + def get_symbol_table(self) -> Dict[str, TSModule]: + return self.application.symbol_table + + def get_modules(self) -> List[TSModule]: + return list(self.application.symbol_table.values()) + + def get_external_symbols(self) -> Dict[str, TSExternalSymbol]: + return self.application.external_symbols + + def get_typescript_file(self, qualified_name: str) -> str | None: + return self._file_of.get(qualified_name) + + def get_typescript_module(self, file_path: str) -> TSModule | None: + return self.application.symbol_table.get(file_path) + + # -----[ call graph ]----- + def get_call_graph(self) -> nx.DiGraph: + """Build (and cache) a NetworkX DiGraph whose nodes are callable signatures (and phantom + external symbols) and whose edges are the identity-only call edges.""" + if self._call_graph is not None: + return self._call_graph + graph = nx.DiGraph() + for sig, callable_ in self._callables.items(): + graph.add_node(sig, callable=callable_, external=False) + # Phantom (external) nodes so that import-attributed edges don't dangle. + for sig, ext in self.application.external_symbols.items(): + graph.add_node(sig, external=True, module=ext.module, name=ext.name) + for edge in self.application.call_graph: + graph.add_edge( + edge.source, + edge.target, + type=edge.type, + weight=edge.weight, + provenance=edge.provenance, + tags=edge.tags, + ) + self._call_graph = graph + return graph + + def get_call_graph_json(self) -> str: + return self.application.model_dump_json() + + def get_all_callers(self, target_class_name: str, target_method_declaration: str | None = None) -> Dict: + """Callers of a method, with the connecting edge metadata. Mirrors + :meth:`PyCodeanalyzer.get_all_callers`. Pass a bare signature as the first argument and + leave ``target_method_declaration`` as ``None`` for module-level / already-resolved + callables and external (phantom) targets.""" + graph = self.get_call_graph() + target = self._resolve_signature(target_class_name, target_method_declaration) + if target not in graph: + return {"target_method": target, "caller_details": []} + callers = [ + {"caller_signature": src, "edge": graph.get_edge_data(src, target)} + for src in graph.predecessors(target) + ] + return {"target_method": target, "caller_details": callers} + + def get_all_callees(self, source_class_name: str, source_method_declaration: str | None = None) -> Dict: + """Callees of a method, with the connecting edge metadata. Mirrors + :meth:`PyCodeanalyzer.get_all_callees`.""" + graph = self.get_call_graph() + source = self._resolve_signature(source_class_name, source_method_declaration) + if source not in graph: + return {"source_method": source, "callee_details": []} + callees = [ + {"callee_signature": tgt, "edge": graph.get_edge_data(source, tgt)} + for tgt in graph.successors(source) + ] + return {"source_method": source, "callee_details": callees} + + def get_class_call_graph( + self, qualified_class_name: str, method_signature: str | None = None + ) -> List[Tuple[str, str]]: + """Call-graph edges reachable from a class (or one of its methods).""" + adjacency: Dict[str, List[str]] = {} + for e in self.application.call_graph: + adjacency.setdefault(e.source, []).append(e.target) + if method_signature is not None: + seeds = [method_signature] + else: + seeds = [m.signature for m in self._methods_by_class.get(qualified_class_name, {}).values()] + edges: List[Tuple[str, str]] = [] + seen = set(seeds) + queue = deque(seeds) + while queue: + src = queue.popleft() + for dst in adjacency.get(src, []): + edges.append((src, dst)) + if dst not in seen: + seen.add(dst) + queue.append(dst) + return edges + + def get_class_hierarchy(self) -> nx.DiGraph: + """Inheritance/implementation graph: an edge child → base for every base_class.""" + graph = nx.DiGraph() + for sig in list(self._classes) + list(self._interfaces): + graph.add_node(sig) + for sig, cl in self._classes.items(): + for base in cl.base_classes: + graph.add_edge(sig, base) + for sig, it in self._interfaces.items(): + for base in it.base_classes: + graph.add_edge(sig, base) + return graph + + # -----[ call sites ]----- + def get_call_sites(self, qualified_callable_name: str) -> List[TSCallsite]: + """The rich, syntactic call sites *inside* a callable (receiver/argument types, resolved + ``callee_signature``, position). Distinct from the resolved call-graph edges.""" + callable_ = self._callables.get(qualified_callable_name) + return list(callable_.call_sites) if callable_ else [] + + def get_calling_lines(self, target_signature: str) -> List[int]: + """Sorted, de-duplicated source lines anywhere in the project where ``target_signature`` + is invoked (matched against each call site's resolved ``callee_signature``).""" + lines: Set[int] = set() + for callable_ in self._callables.values(): + for cs in callable_.call_sites: + if cs.callee_signature == target_signature and cs.start_line >= 0: + lines.add(cs.start_line) + return sorted(lines) + + def get_call_targets(self, source_signature: str) -> Set[str]: + """The set of call targets invoked from a callable, taken from its call sites. Resolved + ``callee_signature`` when available, otherwise the bare ``method_name``.""" + callable_ = self._callables.get(source_signature) + if callable_ is None: + return set() + return {cs.callee_signature or cs.method_name for cs in callable_.call_sites} + + # -----[ classes / interfaces / enums / type-aliases ]----- + def get_all_classes(self) -> Dict[str, TSClass]: + return self._classes + + def get_class(self, qualified_class_name: str) -> TSClass | None: + return self._classes.get(qualified_class_name) + + def get_all_interfaces(self) -> Dict[str, TSInterface]: + return self._interfaces + + def get_all_enums(self) -> Dict[str, TSEnum]: + return self._enums + + def get_enum_members(self, qualified_enum_name: str) -> List[TSEnumMember]: + enum = self._enums.get(qualified_enum_name) + return list(enum.members) if enum else [] + + def get_all_type_aliases(self) -> Dict[str, TSTypeAlias]: + return self._type_aliases + + def get_all_nested_classes(self, qualified_class_name: str) -> List[TSClass]: + cls = self._classes.get(qualified_class_name) + return list(cls.inner_classes.values()) if cls else [] + + def get_all_sub_classes(self, qualified_class_name: str) -> Dict[str, TSClass]: + return {sig: cls for sig, cls in self._classes.items() if qualified_class_name in cls.base_classes} + + def get_extended_classes(self, qualified_class_name: str) -> List[str]: + cls = self._classes.get(qualified_class_name) + if not cls: + return [] + return [b for b in cls.base_classes if b not in cls.implements_types] + + def get_implemented_interfaces(self, qualified_class_name: str) -> List[str]: + cls = self._classes.get(qualified_class_name) + return list(cls.implements_types) if cls else [] + + # -----[ methods / functions / fields ]----- + def get_all_methods_in_application(self) -> Dict[str, Dict[str, TSCallable]]: + return self._methods_by_class + + def get_all_methods_in_class(self, qualified_class_name: str) -> Dict[str, TSCallable]: + return self._methods_by_class.get(qualified_class_name, {}) + + def get_method(self, qualified_class_name: str, qualified_method_name: str) -> TSCallable | None: + return self._methods_by_class.get(qualified_class_name, {}).get(qualified_method_name) + + def get_method_parameters(self, qualified_class_name: str, qualified_method_name: str) -> List[str]: + method = self.get_method(qualified_class_name, qualified_method_name) + return [p.name for p in method.parameters] if method else [] + + def get_all_constructors(self, qualified_class_name: str) -> Dict[str, TSCallable]: + return { + name: m + for name, m in self._methods_by_class.get(qualified_class_name, {}).items() + if m.kind == "constructor" + } + + def get_all_functions(self) -> Dict[str, TSCallable]: + return self._functions + + def get_all_fields(self, qualified_class_name: str) -> List[TSClassAttribute]: + cls = self._classes.get(qualified_class_name) + return list(cls.attributes.values()) if cls else [] + + def get_interface_properties(self, qualified_interface_name: str) -> List[TSClassAttribute]: + it = self._interfaces.get(qualified_interface_name) + return list(it.properties.values()) if it else [] + + # -----[ imports / exports / variables ]----- + def get_imports(self) -> Dict[str, List[TSImport]]: + return {fp: list(m.imports) for fp, m in self.application.symbol_table.items()} + + def get_all_exports(self) -> Dict[str, List[TSExport]]: + return {fp: list(m.exports) for fp, m in self.application.symbol_table.items()} + + def get_all_variables(self) -> Dict[str, List[TSVariableDeclaration]]: + """Module-level variable declarations per file.""" + return {fp: list(m.variables) for fp, m in self.application.symbol_table.items()} + + # -----[ decorators ]----- + def get_decorators(self, qualified_callable_name: str) -> List[TSDecorator]: + callable_ = self._callables.get(qualified_callable_name) + return list(callable_.decorators) if callable_ else [] + + def get_class_decorators(self, qualified_class_name: str) -> List[TSDecorator]: + cls = self._classes.get(qualified_class_name) + return list(cls.decorators) if cls else [] + + def get_methods_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + """Map each requested decorator name to the signatures of callables carrying it.""" + wanted = set(decorators) + result: Dict[str, List[str]] = {d: [] for d in decorators} + for sig, c in self._callables.items(): + for dec in c.decorators: + if dec.name in wanted: + result[dec.name].append(sig) + return result + + def get_classes_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + """Map each requested decorator name to the signatures of classes carrying it.""" + wanted = set(decorators) + result: Dict[str, List[str]] = {d: [] for d in decorators} + for sig, cls in self._classes.items(): + for dec in cls.decorators: + if dec.name in wanted: + result[dec.name].append(sig) + return result diff --git a/cldk/analysis/typescript/neo4j/__init__.py b/cldk/analysis/typescript/neo4j/__init__.py new file mode 100644 index 0000000..f43aaee --- /dev/null +++ b/cldk/analysis/typescript/neo4j/__init__.py @@ -0,0 +1,22 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Neo4j-backed TypeScript analysis backend (Cypher queries over the codeanalyzer-ts graph).""" + +from cldk.analysis.typescript.neo4j.config import Neo4jConnectionConfig +from cldk.analysis.typescript.neo4j.neo4j_backend import TSNeo4jBackend + +__all__ = ["TSNeo4jBackend", "Neo4jConnectionConfig"] diff --git a/cldk/analysis/typescript/neo4j/config.py b/cldk/analysis/typescript/neo4j/config.py new file mode 100644 index 0000000..d568a6c --- /dev/null +++ b/cldk/analysis/typescript/neo4j/config.py @@ -0,0 +1,46 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Connection settings for the Neo4j-backed TypeScript analysis backend.""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass +class Neo4jConnectionConfig: + """How :class:`TSNeo4jBackend` reaches (and, optionally, populates) the graph database. + + Attributes: + uri: Bolt URI of the Neo4j server (e.g. ``bolt://localhost:7687``). + username: Neo4j username. + password: Neo4j password. + database: Database name (None ⇒ server default). + application_name: The ``:Application`` anchor name to scope queries to. Defaults to the + analyzed project directory's name (matching ``codeanalyzer-typescript``'s + ``--app-name`` default). + build_db: If True (default), populate the database from the project on construction by + running ``codeanalyzer-typescript --emit neo4j``. Set False to query a DB that is + already loaded. + """ + + uri: str + username: str = "neo4j" + password: str = "neo4j" + database: str | None = None + application_name: str | None = None + build_db: bool = True diff --git a/cldk/analysis/typescript/neo4j/neo4j_backend.py b/cldk/analysis/typescript/neo4j/neo4j_backend.py new file mode 100644 index 0000000..7afd83c --- /dev/null +++ b/cldk/analysis/typescript/neo4j/neo4j_backend.py @@ -0,0 +1,781 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Neo4j-backed TypeScript analysis backend. + +A drop-in alternative to :class:`TSCodeanalyzer`: it exposes the **same query +method surface** (``get_all_classes``, ``get_call_graph``, ``get_all_callers``, +...) so the :class:`TypeScriptAnalysis` facade can delegate to either one, but +every method answers by running **Cypher over a live Neo4j graph** instead of +walking the in-memory pydantic/NetworkX structures. + +The graph is the one ``codeanalyzer-typescript`` emits with ``--emit neo4j`` +(schema: ``codeanalyzer-ts/schema.neo4j.json``). On construction this backend can +populate the database for you by shelling out to the analyzer binary with +``--emit neo4j --neo4j-uri ...`` (mirroring how :class:`TSCodeanalyzer` shells +out to produce ``analysis.json``); or you can point it at an already-loaded DB +with ``build_db=False``. + +Identity model (must match the in-memory backend): + +* a callable/class/interface/enum/type-alias is a ``:Symbol`` keyed by ``signature``; +* call-graph edges are ``(:Symbol)-[:CALLS]->(:Symbol|:External)``; +* every project-owned node carries a ``_module`` provenance property, so a single + database may hold several applications — all queries here are scoped to this + backend's application by the set of its module ``file_key``s. + +Parity caveats (inherent to what the projection stores, not bugs): + +* ``CALLS`` edge ``tags`` only round-trip the three keys the projection keeps + (``ts.dispatch`` / ``ts.external`` / ``ts.module``); +* ``get_imports`` / ``get_all_exports`` are reconstructed from the *aggregated* + ``IMPORTS`` / ``RE_EXPORTS`` edges (individual bindings, aliases and positions + are not stored); +* comments collapse to a single docstring, type-parameters keep only their names. +""" + +from __future__ import annotations + +import logging +import os +import shlex +import subprocess +from collections import deque +from importlib import resources +from pathlib import Path +from typing import Any, Dict, List, Set, Tuple, Union + +import networkx as nx + +from cldk.analysis import AnalysisLevel +from cldk.analysis.typescript.backend import TSAnalysisBackend +from cldk.analysis.typescript.neo4j import reconstruct as R +from cldk.models.typescript import ( + TSApplication, + TSCallable, + TSCallEdge, + TSCallsite, + TSClass, + TSClassAttribute, + TSDecorator, + TSEnum, + TSEnumMember, + TSExport, + TSExternalSymbol, + TSImport, + TSInterface, + TSModule, + TSTypeAlias, + TSVariableDeclaration, +) +from cldk.utils.exceptions.exceptions import CodeanalyzerExecutionException + +logger = logging.getLogger(__name__) + + +class TSNeo4jBackend(TSAnalysisBackend): + """Build and query the application view of a TypeScript project over Neo4j (Cypher). + + Args: + project_dir: Root of the TypeScript project (required when ``build_db`` is True). + analysis_backend_path: Directory containing the ``codeanalyzer-typescript`` binary. If + None, falls back to ``$CODEANALYZER_TS_BIN``, then the ``codeanalyzer_typescript`` + wheel, then a bundled binary. + analysis_level: ``AnalysisLevel.symbol_table`` (1) or ``AnalysisLevel.call_graph`` (2). + eager_analysis: If True, force a clean rebuild of the graph even if this application's + ``:Application`` anchor already exists in the database. + target_files: Restrict analysis to these files (incremental push). + neo4j_uri: Bolt URI of the Neo4j server (e.g. ``bolt://localhost:7687``). + neo4j_username / neo4j_password: Credentials. + neo4j_database: Database name (None ⇒ server default). + application_name: The ``:Application`` anchor name. Defaults to the project directory + name, matching ``codeanalyzer-typescript``'s ``--app-name`` default. + build_db: If True (default), populate the database from ``project_dir`` on construction. + If False, query an already-loaded graph (``project_dir`` may be None). + """ + + def __init__( + self, + project_dir: Union[str, Path, None], + analysis_backend_path: Union[str, Path, None], + analysis_level: str, + eager_analysis: bool, + target_files: List[str] | None, + neo4j_uri: str, + neo4j_username: str, + neo4j_password: str, + neo4j_database: str | None = None, + application_name: str | None = None, + build_db: bool = True, + ) -> None: + try: + from neo4j import GraphDatabase + except ModuleNotFoundError as e: # pragma: no cover - import guard + raise CodeanalyzerExecutionException("The Neo4j backend requires the 'neo4j' driver. Install it with " "`pip install neo4j` (or `pip install cldk[neo4j]`).") from e + + self.project_dir = project_dir + self.analysis_backend_path = analysis_backend_path + self.analysis_level = analysis_level + self.eager_analysis = eager_analysis + self.target_files = target_files + self.application_name = application_name or (Path(project_dir).name if project_dir else None) + if not self.application_name: + raise CodeanalyzerExecutionException("application_name could not be inferred; pass application_name explicitly when project_dir is None.") + self._database = neo4j_database + self._driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password)) + self._neo4j_conn = (neo4j_uri, neo4j_username, neo4j_password) + + if build_db: + if project_dir is None: + raise CodeanalyzerExecutionException("project_dir is required when build_db=True.") + self._build_graph() + + # The application's module file_keys, used to scope every query to this app. + self._modules: List[str] = self._load_module_keys() + + # -----[ lifecycle ]----- + def close(self) -> None: + """Close the underlying Neo4j driver.""" + self._driver.close() + + def __enter__(self) -> "TSNeo4jBackend": + return self + + def __exit__(self, *exc: Any) -> None: + self.close() + + def _run(self, query: str, **params: Any) -> List[Dict[str, Any]]: + """Run a Cypher statement and return the records as plain dicts (nodes/rels → prop maps).""" + with self._driver.session(database=self._database) as session: + return [record.data() for record in session.run(query, **params)] + + def _load_module_keys(self) -> List[str]: + rows = self._run( + "MATCH (:Application {name: $app})-[:HAS_MODULE]->(m:Module) RETURN m.file_key AS k", + app=self.application_name, + ) + return [r["k"] for r in rows] + + # -----[ binary resolution + DB population ]----- + def _get_codeanalyzer_exec(self) -> List[str]: + """Resolve the codeanalyzer-typescript executable command (mirrors TSCodeanalyzer).""" + if self.analysis_backend_path: + backend = Path(self.analysis_backend_path) + binary = next( + (p for p in backend.rglob("codeanalyzer-typescript*") if p.is_file()), + None, + ) or next((p for p in backend.rglob("codeanalyzer-ts*") if p.is_file()), None) + if binary is None: + raise CodeanalyzerExecutionException("codeanalyzer-typescript binary not found in the provided analysis_backend_path.") + return [str(binary)] + + env_bin = os.environ.get("CODEANALYZER_TS_BIN") + if env_bin: + return shlex.split(env_bin) + + try: + import codeanalyzer_typescript + + return [str(codeanalyzer_typescript.bin_path())] + except (ModuleNotFoundError, FileNotFoundError): + pass + + try: + with resources.as_file(resources.files("cldk.analysis.typescript.codeanalyzer.bin")) as bin_dir: + binary = next((p for p in bin_dir.iterdir() if p.is_file() and p.name.startswith("codeanalyzer")), None) + if binary is not None: + return [str(binary)] + except (ModuleNotFoundError, FileNotFoundError): + pass + + raise CodeanalyzerExecutionException( + "codeanalyzer-typescript binary not found. Pass analysis_backend_path=, set $CODEANALYZER_TS_BIN, or bundle it under cldk/analysis/typescript/codeanalyzer/bin/." + ) + + def _build_graph(self) -> None: + """Push this project's graph into Neo4j via ``--emit neo4j --neo4j-uri`` (Bolt). + + Lazy by default: if the ``:Application`` anchor already exists and ``eager_analysis`` is + False (and this is not a targeted/incremental run), the push is skipped. + """ + if not self.eager_analysis and not self.target_files and self._application_exists(): + logger.info("Neo4j already has application '%s'; skipping rebuild (lazy).", self.application_name) + return + + uri, user, password = self._neo4j_conn + level = 1 if self.analysis_level == AnalysisLevel.symbol_table else 2 + args = self._get_codeanalyzer_exec() + [ + "-i", + str(Path(self.project_dir)), + "-a", + str(level), + "--emit", + "neo4j", + "--neo4j-uri", + uri, + "--neo4j-user", + user, + "--neo4j-password", + password, + "--app-name", + self.application_name, + ] + if self._database: + args += ["--neo4j-database", self._database] + if self.eager_analysis: + args += ["--eager"] + for tf in self.target_files or []: + args += ["-t", str(tf).strip()] + + try: + logger.info("Running codeanalyzer-typescript (neo4j emit): %s", " ".join(args)) + subprocess.run(args, capture_output=True, text=True, check=True) + except Exception as e: # noqa: BLE001 + raise CodeanalyzerExecutionException(str(e)) from e + + def _application_exists(self) -> bool: + rows = self._run("MATCH (a:Application {name: $app}) RETURN count(a) AS c", app=self.application_name) + return bool(rows and rows[0]["c"] > 0) + + # -----[ child-fetch helpers (reconstruction) ]----- + def _decorators_of(self, signature: str) -> List[TSDecorator]: + rows = self._run( + "MATCH (s:Symbol {signature: $sig})-[r:DECORATED_BY]->(d:Decorator) " "RETURN properties(d) AS node, properties(r) AS edge ORDER BY r.start_line", + sig=signature, + ) + return [R.decorator(r["node"], r["edge"]) for r in rows] + + def _attribute_decorators(self, attr_id: str) -> List[TSDecorator]: + rows = self._run( + "MATCH (a:Attribute {id: $id})-[r:DECORATED_BY]->(d:Decorator) " "RETURN properties(d) AS node, properties(r) AS edge ORDER BY r.start_line", + id=attr_id, + ) + return [R.decorator(r["node"], r["edge"]) for r in rows] + + def _callsites_of(self, signature: str) -> List[TSCallsite]: + rows = self._run( + "MATCH (c:Callable {signature: $sig})-[:HAS_CALLSITE]->(cs:CallSite) " "RETURN properties(cs) AS p ORDER BY cs.start_line, cs.start_column", + sig=signature, + ) + return [R.callsite(r["p"]) for r in rows] + + def _callable_full(self, props: Dict[str, Any]) -> TSCallable: + sig = props["signature"] + # Symbol-keyed containers are keyed by signature (matching the analyzer's dict keys). + inner_callables = {p["signature"]: self._callable_full(p) for p in self._children(sig, "DECLARES", "Callable")} + inner_classes = {p["signature"]: self._class_full(p) for p in self._children(sig, "DECLARES", "Class")} + return R.callable_( + props, + decorators=self._decorators_of(sig), + call_sites=self._callsites_of(sig), + inner_callables=inner_callables, + inner_classes=inner_classes, + ) + + @staticmethod + def _method_key(props: Dict[str, Any]) -> str: + """The class/interface ``methods`` dict key: ``sig`` for normal methods, ``sig#get`` / + ``sig#set`` for accessors (the analyzer disambiguates same-signature get/set this way).""" + sig = props["signature"] + accessor = props.get("accessor_kind") + if accessor == "getter": + return f"{sig}#get" + if accessor == "setter": + return f"{sig}#set" + return sig + + def _class_full(self, props: Dict[str, Any]) -> TSClass: + sig = props["signature"] + # methods keyed by the analyzer's method-key; inner_classes by signature; attributes by name. + methods = {self._method_key(p): self._callable_full(p) for p in self._members(sig, "HAS_METHOD", "Callable")} + attributes: Dict[str, TSClassAttribute] = {} + for p in self._members(sig, "HAS_ATTRIBUTE", "Attribute"): + attributes[p["name"]] = R.attribute(p, self._attribute_decorators(p.get("id", ""))) + inner_classes = {p["signature"]: self._class_full(p) for p in self._children(sig, "DECLARES", "Class")} + return R.class_( + props, + decorators=self._decorators_of(sig), + methods=methods, + attributes=attributes, + inner_classes=inner_classes, + ) + + def _interface_full(self, props: Dict[str, Any]) -> TSInterface: + sig = props["signature"] + methods = {self._method_key(p): self._callable_full(p) for p in self._members(sig, "HAS_METHOD", "Callable")} + properties: Dict[str, TSClassAttribute] = {} + for p in self._members(sig, "HAS_ATTRIBUTE", "Attribute"): + properties[p["name"]] = R.attribute(p, self._attribute_decorators(p.get("id", ""))) + return R.interface(props, methods=methods, properties=properties) + + def _children(self, signature: str, rel: str, label: str) -> List[Dict[str, Any]]: + """Property maps of ``label`` nodes reached from a symbol via ``rel`` (one hop), in + declaration (source) order.""" + rows = self._run( + f"MATCH (s:Symbol {{signature: $sig}})-[:{rel}]->(n:{label}) " "RETURN properties(n) AS p ORDER BY n.start_line, n.name", + sig=signature, + ) + return [r["p"] for r in rows] + + def _members(self, signature: str, rel: str, label: str) -> List[Dict[str, Any]]: + """Property maps of member ``label`` nodes (methods/attributes), in declaration order.""" + rows = self._run( + f"MATCH (s:Symbol {{signature: $sig}})-[:{rel}]->(n:{label}) " "RETURN properties(n) AS p ORDER BY n.start_line, n.name", + sig=signature, + ) + return [r["p"] for r in rows] + + # -----[ application / whole-program ]----- + def get_application(self) -> TSApplication: + """Re-hydrate the whole :class:`TSApplication` (symbol table + call graph + externals).""" + return TSApplication( + symbol_table=self.get_symbol_table(), + call_graph=self._call_edges(), + external_symbols=self.get_external_symbols(), + ) + + def get_symbol_table(self) -> Dict[str, TSModule]: + modules: Dict[str, TSModule] = {} + for key in self._modules: + mod = self.get_typescript_module(key) + if mod is not None: + modules[key] = mod + return modules + + def get_modules(self) -> List[TSModule]: + return list(self.get_symbol_table().values()) + + def get_external_symbols(self) -> Dict[str, TSExternalSymbol]: + rows = self._run( + "MATCH (s:Symbol)-[:CALLS]->(e:External) WHERE s._module IN $mods " + "RETURN DISTINCT properties(e) AS p " + "UNION " + "MATCH (cs:CallSite)-[:RESOLVES_TO]->(e:External) WHERE cs._module IN $mods " + "RETURN DISTINCT properties(e) AS p", + mods=self._modules, + ) + return {r["p"]["signature"]: R.external(r["p"]) for r in rows} + + def get_typescript_file(self, qualified_name: str) -> str | None: + rows = self._run( + "MATCH (s:Symbol {signature: $sig}) WHERE s._module IN $mods RETURN s._module AS m LIMIT 1", + sig=qualified_name, + mods=self._modules, + ) + return rows[0]["m"] if rows else None + + def get_typescript_module(self, file_path: str) -> TSModule | None: + rows = self._run("MATCH (m:Module {file_key: $key}) RETURN properties(m) AS p", key=file_path) + if not rows: + return None + props = rows[0]["p"] + # All symbol containers are keyed by signature (matching the analyzer's dict keys). + classes = {p["signature"]: self._class_full(p) for p in self._module_decls(file_path, "Class")} + interfaces = {p["signature"]: self._interface_full(p) for p in self._module_decls(file_path, "Interface")} + enums = {p["signature"]: R.enum(p) for p in self._module_decls(file_path, "Enum")} + type_aliases = {p["signature"]: R.type_alias(p) for p in self._module_decls(file_path, "TypeAlias")} + functions = {p["signature"]: self._callable_full(p) for p in self._module_decls(file_path, "Callable")} + namespaces = {p["signature"]: self._namespace_full(p) for p in self._module_decls(file_path, "Namespace")} + variables = self._module_variables(file_path) + imports = self._module_imports(file_path) + exports = self._module_exports(file_path) + return R.module( + props, + classes=classes, + interfaces=interfaces, + enums=enums, + type_aliases=type_aliases, + functions=functions, + namespaces=namespaces, + variables=variables, + imports=imports, + exports=exports, + ) + + def _module_decls(self, file_key: str, label: str) -> List[Dict[str, Any]]: + rows = self._run( + f"MATCH (m:Module {{file_key: $key}})-[:DECLARES]->(n:{label}) " "RETURN properties(n) AS p ORDER BY n.start_line, n.name", + key=file_key, + ) + return [r["p"] for r in rows] + + def _namespace_full(self, props: Dict[str, Any]): + sig = props["signature"] + classes = {p["signature"]: self._class_full(p) for p in self._children(sig, "DECLARES", "Class")} + interfaces = {p["signature"]: self._interface_full(p) for p in self._children(sig, "DECLARES", "Interface")} + enums = {p["signature"]: R.enum(p) for p in self._children(sig, "DECLARES", "Enum")} + type_aliases = {p["signature"]: R.type_alias(p) for p in self._children(sig, "DECLARES", "TypeAlias")} + functions = {p["signature"]: self._callable_full(p) for p in self._children(sig, "DECLARES", "Callable")} + namespaces = {p["signature"]: self._namespace_full(p) for p in self._children(sig, "DECLARES", "Namespace")} + rows = self._run( + "MATCH (s:Symbol {signature: $sig})-[:DECLARES_VAR]->(v:Variable) RETURN properties(v) AS p", + sig=sig, + ) + variables = [R.variable(r["p"]) for r in rows] + return R.namespace( + props, + classes=classes, + interfaces=interfaces, + enums=enums, + type_aliases=type_aliases, + functions=functions, + namespaces=namespaces, + variables=variables, + ) + + def _module_variables(self, file_key: str) -> List[TSVariableDeclaration]: + rows = self._run( + "MATCH (m:Module {file_key: $key})-[:DECLARES_VAR]->(v:Variable) RETURN properties(v) AS p", + key=file_key, + ) + return [R.variable(r["p"]) for r in rows] + + def _module_imports(self, file_key: str) -> List[TSImport]: + """Best-effort: synthesize one TSImport per imported name on each aggregated IMPORTS edge. + + The projection collapses every binding to a module-pair into a single edge carrying + ``imported_names`` / ``import_kinds`` / ``is_type_only``, so per-binding aliases, kinds + and positions are not recoverable. + """ + rows = self._run( + "MATCH (m:Module {file_key: $key})-[r:IMPORTS]->(t) " "RETURN coalesce(t.file_key, t.name) AS target, properties(r) AS edge", + key=file_key, + ) + out: List[TSImport] = [] + for r in rows: + edge = r["edge"] + kinds = edge.get("import_kinds", []) or [] + kind = kinds[0] if len(kinds) == 1 else "named" + type_only = edge.get("is_type_only", False) + names = edge.get("imported_names", []) or [] + if not names: + out.append(TSImport(module=r["target"], name="", import_kind=kind, is_type_only=type_only)) + for name in names: + out.append(TSImport(module=r["target"], name=name, import_kind=kind, is_type_only=type_only)) + return out + + def _module_exports(self, file_key: str) -> List[TSExport]: + """Best-effort: only re-exports survive as edges (local exports become ``is_exported`` props).""" + rows = self._run( + "MATCH (m:Module {file_key: $key})-[:RE_EXPORTS]->(t) " "RETURN coalesce(t.file_key, t.name) AS target", + key=file_key, + ) + return [TSExport(module=r["target"], name="*", export_kind="re_export") for r in rows] + + # -----[ call graph ]----- + def _call_edges(self) -> List[TSCallEdge]: + rows = self._run( + "MATCH (s:Symbol)-[r:CALLS]->(t:Symbol) WHERE s._module IN $mods " "RETURN s.signature AS src, t.signature AS tgt, properties(r) AS edge", + mods=self._modules, + ) + return [ + TSCallEdge( + source=r["src"], + target=r["tgt"], + weight=r["edge"].get("weight", 1), + provenance=list(r["edge"].get("provenance", []) or []), + tags=self._edge_tags(r["edge"]), + ) + for r in rows + ] + + @staticmethod + def _edge_tags(edge: Dict[str, Any]) -> Dict[str, str]: + """Invert the flattened CALLS-edge tag props back into the ``ts.*`` tag dict.""" + tags: Dict[str, str] = {} + if edge.get("dispatch") is not None: + tags["ts.dispatch"] = edge["dispatch"] + if edge.get("external") is True: + tags["ts.external"] = "true" + if edge.get("module") is not None: + tags["ts.module"] = edge["module"] + return tags + + def get_call_graph(self) -> nx.DiGraph: + """NetworkX DiGraph of callable signatures (+ phantom external symbols) and CALLS edges.""" + graph = nx.DiGraph() + # Internal callable nodes (with the reconstructed callable, matching the in-memory backend). + for props in self._all_callable_props(): + graph.add_node(props["signature"], callable=self._callable_full(props), external=False) + # Phantom (external) nodes so import-attributed edges don't dangle. + for sig, ext in self.get_external_symbols().items(): + graph.add_node(sig, external=True, module=ext.module, name=ext.name) + # Edges (auto-create any endpoint not added above, matching nx.add_edge semantics). + for r in self._run( + "MATCH (s:Symbol)-[r:CALLS]->(t:Symbol) WHERE s._module IN $mods " "RETURN s.signature AS src, t.signature AS tgt, properties(r) AS edge", + mods=self._modules, + ): + edge = r["edge"] + graph.add_edge( + r["src"], + r["tgt"], + type="CALL_DEP", + weight=edge.get("weight", 1), + provenance=list(edge.get("provenance", []) or []), + tags=self._edge_tags(edge), + ) + return graph + + def get_call_graph_json(self) -> str: + return self.get_application().model_dump_json() + + def _all_callable_props(self) -> List[Dict[str, Any]]: + rows = self._run("MATCH (c:Callable) WHERE c._module IN $mods RETURN properties(c) AS p", mods=self._modules) + return [r["p"] for r in rows] + + def _resolve_signature(self, class_or_sig: str, member: str | None = None) -> str: + """Resolve a ``(class/module, member)`` pair (or a bare signature) to a signature string.""" + if member is None: + return class_or_sig + rows = self._run( + "MATCH (o:Symbol {signature: $owner})-[:HAS_METHOD]->(m:Callable {name: $name}) " "RETURN m.signature AS sig LIMIT 1", + owner=class_or_sig, + name=member, + ) + if rows: + return rows[0]["sig"] + composed = f"{class_or_sig}.{member}" + rows = self._run("MATCH (c:Callable {signature: $sig}) RETURN c.signature AS sig LIMIT 1", sig=composed) + return rows[0]["sig"] if rows else composed + + def get_all_callers(self, target_class_name: str, target_method_declaration: str | None = None) -> Dict: + target = self._resolve_signature(target_class_name, target_method_declaration) + rows = self._run( + "MATCH (src:Symbol)-[r:CALLS]->(t:Symbol {signature: $target}) WHERE src._module IN $mods " "RETURN src.signature AS caller, properties(r) AS edge", + target=target, + mods=self._modules, + ) + caller_details = [{"caller_signature": r["caller"], "edge": self._edge_dict(r["edge"])} for r in rows] + return {"target_method": target, "caller_details": caller_details} + + def get_all_callees(self, source_class_name: str, source_method_declaration: str | None = None) -> Dict: + source = self._resolve_signature(source_class_name, source_method_declaration) + rows = self._run( + "MATCH (s:Symbol {signature: $source})-[r:CALLS]->(tgt:Symbol) " "RETURN tgt.signature AS callee, properties(r) AS edge", + source=source, + ) + callee_details = [{"callee_signature": r["callee"], "edge": self._edge_dict(r["edge"])} for r in rows] + return {"source_method": source, "callee_details": callee_details} + + def _edge_dict(self, edge: Dict[str, Any]) -> Dict[str, Any]: + """The call-graph edge metadata dict, matching ``get_call_graph`` node-edge attributes.""" + return { + "type": "CALL_DEP", + "weight": edge.get("weight", 1), + "provenance": list(edge.get("provenance", []) or []), + "tags": self._edge_tags(edge), + } + + def get_class_call_graph(self, qualified_class_name: str, method_signature: str | None = None) -> List[Tuple[str, str]]: + """Call-graph edges reachable (BFS) from a class (or one of its methods).""" + adjacency: Dict[str, List[str]] = {} + for r in self._run( + "MATCH (s:Symbol)-[:CALLS]->(t:Symbol) WHERE s._module IN $mods " "RETURN s.signature AS src, t.signature AS tgt ORDER BY src, tgt", + mods=self._modules, + ): + adjacency.setdefault(r["src"], []).append(r["tgt"]) + + if method_signature is not None: + seeds = [method_signature] + else: + seeds = [p["signature"] for p in self._members(qualified_class_name, "HAS_METHOD", "Callable")] + edges: List[Tuple[str, str]] = [] + seen = set(seeds) + queue = deque(seeds) + while queue: + src = queue.popleft() + for dst in adjacency.get(src, []): + edges.append((src, dst)) + if dst not in seen: + seen.add(dst) + queue.append(dst) + return edges + + def get_class_hierarchy(self) -> nx.DiGraph: + """Inheritance/implementation graph: an edge child → base for every base_class.""" + graph = nx.DiGraph() + rows = self._run( + "MATCH (n:Symbol) WHERE (n:Class OR n:Interface) AND n._module IN $mods " "RETURN n.signature AS sig, n.base_classes AS bases", + mods=self._modules, + ) + for r in rows: + graph.add_node(r["sig"]) + for r in rows: + for base in r["bases"] or []: + graph.add_edge(r["sig"], base) + return graph + + # -----[ call sites ]----- + def get_call_sites(self, qualified_callable_name: str) -> List[TSCallsite]: + return self._callsites_of(qualified_callable_name) + + def get_calling_lines(self, target_signature: str) -> List[int]: + rows = self._run( + "MATCH (cs:CallSite) WHERE cs._module IN $mods AND cs.callee_signature = $sig " "AND cs.start_line >= 0 RETURN DISTINCT cs.start_line AS line ORDER BY line", + mods=self._modules, + sig=target_signature, + ) + return [r["line"] for r in rows] + + def get_call_targets(self, source_signature: str) -> Set[str]: + rows = self._run( + "MATCH (c:Callable {signature: $sig})-[:HAS_CALLSITE]->(cs:CallSite) " "RETURN cs.callee_signature AS cosig, cs.method_name AS mn", + sig=source_signature, + ) + return {(r["cosig"] or r["mn"]) for r in rows} + + # -----[ classes / interfaces / enums / type-aliases ]----- + def get_all_classes(self) -> Dict[str, TSClass]: + rows = self._run("MATCH (c:Class) WHERE c._module IN $mods RETURN properties(c) AS p", mods=self._modules) + return {r["p"]["signature"]: self._class_full(r["p"]) for r in rows} + + def get_class(self, qualified_class_name: str) -> TSClass | None: + rows = self._run( + "MATCH (c:Class {signature: $sig}) WHERE c._module IN $mods RETURN properties(c) AS p", + sig=qualified_class_name, + mods=self._modules, + ) + return self._class_full(rows[0]["p"]) if rows else None + + def get_all_interfaces(self) -> Dict[str, TSInterface]: + rows = self._run("MATCH (i:Interface) WHERE i._module IN $mods RETURN properties(i) AS p", mods=self._modules) + return {r["p"]["signature"]: self._interface_full(r["p"]) for r in rows} + + def get_all_enums(self) -> Dict[str, TSEnum]: + rows = self._run("MATCH (e:Enum) WHERE e._module IN $mods RETURN properties(e) AS p", mods=self._modules) + return {r["p"]["signature"]: R.enum(r["p"]) for r in rows} + + def get_enum_members(self, qualified_enum_name: str) -> List[TSEnumMember]: + rows = self._run("MATCH (e:Enum {signature: $sig}) RETURN properties(e) AS p", sig=qualified_enum_name) + return R.enum(rows[0]["p"]).members if rows else [] + + def get_all_type_aliases(self) -> Dict[str, TSTypeAlias]: + rows = self._run("MATCH (t:TypeAlias) WHERE t._module IN $mods RETURN properties(t) AS p", mods=self._modules) + return {r["p"]["signature"]: R.type_alias(r["p"]) for r in rows} + + def get_all_nested_classes(self, qualified_class_name: str) -> List[TSClass]: + return [self._class_full(p) for p in self._children(qualified_class_name, "DECLARES", "Class")] + + def get_all_sub_classes(self, qualified_class_name: str) -> Dict[str, TSClass]: + rows = self._run( + "MATCH (c:Class) WHERE c._module IN $mods AND $sig IN c.base_classes " "RETURN properties(c) AS p", + sig=qualified_class_name, + mods=self._modules, + ) + return {r["p"]["signature"]: self._class_full(r["p"]) for r in rows} + + def get_extended_classes(self, qualified_class_name: str) -> List[str]: + rows = self._run( + "MATCH (c:Class {signature: $sig}) RETURN c.base_classes AS bases, c.implements_types AS impl", + sig=qualified_class_name, + ) + if not rows: + return [] + bases = rows[0]["bases"] or [] + impl = set(rows[0]["impl"] or []) + return [b for b in bases if b not in impl] + + def get_implemented_interfaces(self, qualified_class_name: str) -> List[str]: + rows = self._run("MATCH (c:Class {signature: $sig}) RETURN c.implements_types AS impl", sig=qualified_class_name) + return list(rows[0]["impl"] or []) if rows else [] + + # -----[ methods / functions / fields ]----- + def get_all_methods_in_application(self) -> Dict[str, Dict[str, TSCallable]]: + # Mirror the in-memory `_methods_by_class`: an entry for *every* class and interface + # (even those with no methods), each keyed by the method's short name. + out: Dict[str, Dict[str, TSCallable]] = {} + for r in self._run( + "MATCH (n:Symbol) WHERE (n:Class OR n:Interface) AND n._module IN $mods " "RETURN n.signature AS sig", + mods=self._modules, + ): + out[r["sig"]] = {} + for r in self._run( + "MATCH (owner:Symbol)-[:HAS_METHOD]->(m:Callable) WHERE owner._module IN $mods " "RETURN owner.signature AS owner, properties(m) AS p", + mods=self._modules, + ): + out.setdefault(r["owner"], {})[r["p"]["name"]] = self._callable_full(r["p"]) + return out + + def get_all_methods_in_class(self, qualified_class_name: str) -> Dict[str, TSCallable]: + return {p["name"]: self._callable_full(p) for p in self._members(qualified_class_name, "HAS_METHOD", "Callable")} + + def get_method(self, qualified_class_name: str, qualified_method_name: str) -> TSCallable | None: + rows = self._run( + "MATCH (o:Symbol {signature: $sig})-[:HAS_METHOD]->(m:Callable {name: $name}) " "RETURN properties(m) AS p LIMIT 1", + sig=qualified_class_name, + name=qualified_method_name, + ) + return self._callable_full(rows[0]["p"]) if rows else None + + def get_method_parameters(self, qualified_class_name: str, qualified_method_name: str) -> List[str]: + method = self.get_method(qualified_class_name, qualified_method_name) + return [p.name for p in method.parameters] if method else [] + + def get_all_constructors(self, qualified_class_name: str) -> Dict[str, TSCallable]: + return {p["name"]: self._callable_full(p) for p in self._members(qualified_class_name, "HAS_METHOD", "Callable") if p.get("kind") == "constructor"} + + def get_all_functions(self) -> Dict[str, TSCallable]: + rows = self._run( + "MATCH (parent)-[:DECLARES]->(c:Callable) " "WHERE (parent:Module OR parent:Namespace) AND c._module IN $mods " "RETURN properties(c) AS p", + mods=self._modules, + ) + return {r["p"]["signature"]: self._callable_full(r["p"]) for r in rows} + + def get_all_fields(self, qualified_class_name: str) -> List[TSClassAttribute]: + return [R.attribute(p, self._attribute_decorators(p.get("id", ""))) for p in self._members(qualified_class_name, "HAS_ATTRIBUTE", "Attribute")] + + def get_interface_properties(self, qualified_interface_name: str) -> List[TSClassAttribute]: + return [R.attribute(p, self._attribute_decorators(p.get("id", ""))) for p in self._members(qualified_interface_name, "HAS_ATTRIBUTE", "Attribute")] + + # -----[ imports / exports / variables ]----- + def get_imports(self) -> Dict[str, List[TSImport]]: + return {key: self._module_imports(key) for key in self._modules} + + def get_all_exports(self) -> Dict[str, List[TSExport]]: + return {key: self._module_exports(key) for key in self._modules} + + def get_all_variables(self) -> Dict[str, List[TSVariableDeclaration]]: + return {key: self._module_variables(key) for key in self._modules} + + # -----[ decorators ]----- + def get_decorators(self, qualified_callable_name: str) -> List[TSDecorator]: + return self._decorators_of(qualified_callable_name) + + def get_class_decorators(self, qualified_class_name: str) -> List[TSDecorator]: + return self._decorators_of(qualified_class_name) + + def get_methods_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + result: Dict[str, List[str]] = {d: [] for d in decorators} + rows = self._run( + "MATCH (c:Callable)-[:DECORATED_BY]->(d:Decorator) " "WHERE c._module IN $mods AND d.name IN $names " "RETURN d.name AS dn, c.signature AS sig", + mods=self._modules, + names=decorators, + ) + for r in rows: + result[r["dn"]].append(r["sig"]) + return result + + def get_classes_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + result: Dict[str, List[str]] = {d: [] for d in decorators} + rows = self._run( + "MATCH (c:Class)-[:DECORATED_BY]->(d:Decorator) " "WHERE c._module IN $mods AND d.name IN $names " "RETURN d.name AS dn, c.signature AS sig", + mods=self._modules, + names=decorators, + ) + for r in rows: + result[r["dn"]].append(r["sig"]) + return result diff --git a/cldk/analysis/typescript/neo4j/reconstruct.py b/cldk/analysis/typescript/neo4j/reconstruct.py new file mode 100644 index 0000000..d168a4b --- /dev/null +++ b/cldk/analysis/typescript/neo4j/reconstruct.py @@ -0,0 +1,370 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Rebuild pydantic TypeScript models from Neo4j node/edge property maps. + +These are *pure* functions: they take the flat property dictionaries that +``codeanalyzer-typescript``'s Neo4j projection wrote (see +``codeanalyzer-ts/src/build/neo4j/project.ts``) and re-hydrate the same +``cldk.models.typescript`` pydantic objects the in-memory backend returns. The +backend (:class:`TSNeo4jBackend`) fetches the related child rows (call sites, +decorators, methods, ...) over Cypher and hands them in here for assembly. + +Lossy fields (the projection flattens or drops them, so a perfect round-trip is +impossible) are reconstructed best-effort and called out inline: + +* ``comments`` collapse to a single synthetic docstring ``TSComment`` (only the + joined docstring text survives the projection). +* ``type_parameters`` keep only their ``name`` (constraints/defaults dropped). +* module-level ``imports`` / ``exports`` are aggregated per module-pair into the + ``IMPORTS`` / ``RE_EXPORTS`` edges, so individual bindings are synthesized. +""" + +from __future__ import annotations + +import json +from typing import Any, Dict, List, Mapping + +from cldk.models.typescript import ( + TSCallable, + TSCallableParameter, + TSCallsite, + TSClass, + TSClassAttribute, + TSComment, + TSDecorator, + TSEnum, + TSEnumMember, + TSExternalSymbol, + TSInterface, + TSModule, + TSNamespace, + TSSymbol, + TSTypeAlias, + TSTypeParameter, + TSVariableDeclaration, +) + +Props = Mapping[str, Any] + + +# ---------------------------------------------------------------------------------------------- +# small helpers +# ---------------------------------------------------------------------------------------------- +def _comments(props: Props) -> List[TSComment]: + """Re-hydrate the (lossy) docstring the projection stored as a flat ``docstring`` string.""" + doc = props.get("docstring") + return [TSComment(content=doc, is_docstring=True)] if doc else [] + + +def _type_params(props: Props) -> List[TSTypeParameter]: + """The projection keeps only the parameter *names* (``type_parameter_names``).""" + return [TSTypeParameter(name=n) for n in props.get("type_parameter_names", []) or []] + + +def _json_list(props: Props, key: str) -> List[dict]: + """Decode a ``*_json`` property (``parameters_json`` / ``accessed_symbols_json``).""" + raw = props.get(key) + if not raw: + return [] + try: + data = json.loads(raw) + except (TypeError, ValueError): + return [] + return data if isinstance(data, list) else [] + + +def _entrypoint(props: Props) -> Dict[str, Any]: + """Map the flattened entrypoint props back onto ``TSCallable``'s two entrypoint fields.""" + if "framework" not in props: + return {} + return {"is_entrypoint": True, "entrypoint_framework": props.get("framework")} + + +# ---------------------------------------------------------------------------------------------- +# leaf nodes +# ---------------------------------------------------------------------------------------------- +def callsite(props: Props) -> TSCallsite: + return TSCallsite( + method_name=props.get("method_name", ""), + receiver_expr=props.get("receiver_expr"), + receiver_type=props.get("receiver_type"), + argument_types=list(props.get("argument_types", []) or []), + type_arguments=list(props.get("type_arguments", []) or []), + return_type=props.get("return_type"), + callee_signature=props.get("callee_signature"), + is_constructor_call=props.get("is_constructor_call", False), + is_optional_chain=props.get("is_optional_chain", False), + start_line=props.get("start_line", -1), + start_column=props.get("start_column", -1), + end_line=props.get("end_line", -1), + end_column=props.get("end_column", -1), + ) + + +def decorator(node: Props, edge: Props | None = None) -> TSDecorator: + """A decorator from its canonical ``:Decorator`` node + the ``DECORATED_BY`` edge props.""" + edge = edge or {} + kwargs_raw = edge.get("keyword_arguments_json") + keyword_arguments: Dict[str, str] = {} + if kwargs_raw: + try: + keyword_arguments = json.loads(kwargs_raw) + except (TypeError, ValueError): + keyword_arguments = {} + return TSDecorator( + name=node.get("name", ""), + qualified_name=node.get("qualified_name"), + positional_arguments=list(edge.get("positional_arguments", []) or []), + keyword_arguments=keyword_arguments, + start_line=edge.get("start_line", -1), + end_line=edge.get("end_line", -1), + ) + + +def attribute(props: Props, decorators: List[TSDecorator] | None = None) -> TSClassAttribute: + return TSClassAttribute( + name=props.get("name", ""), + type=props.get("type"), + comments=_comments(props), + decorators=decorators or [], + initializer=props.get("initializer"), + accessibility=props.get("accessibility"), + is_static=props.get("is_static", False), + is_readonly=props.get("is_readonly", False), + is_optional=props.get("is_optional", False), + is_abstract=props.get("is_abstract", False), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + ) + + +def variable(props: Props) -> TSVariableDeclaration: + return TSVariableDeclaration( + name=props.get("name", ""), + type=props.get("type"), + initializer=props.get("initializer"), + scope=props.get("scope", "module"), + declaration_kind=props.get("declaration_kind", "unknown"), + is_readonly=props.get("is_readonly", False), + is_exported=props.get("is_exported", False), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + ) + + +def enum_member(name: str, value: str | None) -> TSEnumMember: + # The projection stores "" for a memberless value (Neo4j arrays cannot hold null). + return TSEnumMember(name=name, value=value if value else None) + + +def external(props: Props) -> TSExternalSymbol: + return TSExternalSymbol( + signature=props.get("signature", ""), + name=props.get("name", ""), + module=props.get("module", ""), + kind=props.get("kind", "unknown"), + ) + + +# ---------------------------------------------------------------------------------------------- +# declaration nodes (children supplied by the backend) +# ---------------------------------------------------------------------------------------------- +def callable_( + props: Props, + *, + decorators: List[TSDecorator] | None = None, + call_sites: List[TSCallsite] | None = None, + inner_callables: Dict[str, TSCallable] | None = None, + inner_classes: Dict[str, TSClass] | None = None, +) -> TSCallable: + def _params() -> List[TSCallableParameter]: + out: List[TSCallableParameter] = [] + for p in _json_list(props, "parameters_json"): + try: + out.append(TSCallableParameter.model_validate(p)) + except Exception: # noqa: BLE001 - tolerate analyzer/SDK schema drift + out.append(TSCallableParameter(name=p.get("name", ""))) + return out + + def _accessed() -> List[TSSymbol]: + out: List[TSSymbol] = [] + for s in _json_list(props, "accessed_symbols_json"): + try: + out.append(TSSymbol.model_validate(s)) + except Exception: # noqa: BLE001 + pass + return out + + return TSCallable( + name=props.get("name", ""), + path=props.get("path", ""), + signature=props.get("signature", ""), + comments=_comments(props), + decorators=decorators or [], + parameters=_params(), + type_parameters=_type_params(props), + return_type=props.get("return_type"), + code=props.get("code"), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + code_start_line=props.get("code_start_line", -1), + accessed_symbols=_accessed(), + call_sites=call_sites or [], + inner_callables=inner_callables or {}, + inner_classes=inner_classes or {}, + cyclomatic_complexity=props.get("cyclomatic_complexity", 0), + kind=props.get("kind", "function"), + accessibility=props.get("accessibility"), + accessor_kind=props.get("accessor_kind"), + is_static=props.get("is_static", False), + is_abstract=props.get("is_abstract", False), + is_async=props.get("is_async", False), + is_generator=props.get("is_generator", False), + is_optional=props.get("is_optional", False), + is_readonly=props.get("is_readonly", False), + is_exported=props.get("is_exported", False), + is_ambient=props.get("is_ambient", False), + is_implicit=props.get("is_implicit", False), + **_entrypoint(props), + ) + + +def class_( + props: Props, + *, + decorators: List[TSDecorator] | None = None, + methods: Dict[str, TSCallable] | None = None, + attributes: Dict[str, TSClassAttribute] | None = None, + inner_classes: Dict[str, TSClass] | None = None, +) -> TSClass: + return TSClass( + name=props.get("name", ""), + signature=props.get("signature", ""), + comments=_comments(props), + code=props.get("code"), + decorators=decorators or [], + base_classes=list(props.get("base_classes", []) or []), + implements_types=list(props.get("implements_types", []) or []), + type_parameters=_type_params(props), + methods=methods or {}, + attributes=attributes or {}, + inner_classes=inner_classes or {}, + is_abstract=props.get("is_abstract", False), + is_exported=props.get("is_exported", False), + is_ambient=props.get("is_ambient", False), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + ) + + +def interface( + props: Props, + *, + methods: Dict[str, TSCallable] | None = None, + properties: Dict[str, TSClassAttribute] | None = None, +) -> TSInterface: + return TSInterface( + name=props.get("name", ""), + signature=props.get("signature", ""), + comments=_comments(props), + code=props.get("code"), + base_classes=list(props.get("base_classes", []) or []), + type_parameters=_type_params(props), + methods=methods or {}, + properties=properties or {}, + call_signatures=list(props.get("call_signatures", []) or []), + index_signatures=list(props.get("index_signatures", []) or []), + is_exported=props.get("is_exported", False), + is_ambient=props.get("is_ambient", False), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + ) + + +def enum(props: Props) -> TSEnum: + names = props.get("member_names", []) or [] + values = props.get("member_values", []) or [] + members = [enum_member(n, values[i] if i < len(values) else None) for i, n in enumerate(names)] + return TSEnum( + name=props.get("name", ""), + signature=props.get("signature", ""), + comments=_comments(props), + code=props.get("code"), + members=members, + is_const=props.get("is_const", False), + is_exported=props.get("is_exported", False), + is_ambient=props.get("is_ambient", False), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + ) + + +def type_alias(props: Props) -> TSTypeAlias: + return TSTypeAlias( + name=props.get("name", ""), + signature=props.get("signature", ""), + comments=_comments(props), + code=props.get("code"), + aliased_type=props.get("aliased_type", ""), + type_parameters=_type_params(props), + is_exported=props.get("is_exported", False), + is_ambient=props.get("is_ambient", False), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + ) + + +def namespace( + props: Props, + *, + classes: Dict[str, TSClass] | None = None, + interfaces: Dict[str, TSInterface] | None = None, + enums: Dict[str, TSEnum] | None = None, + type_aliases: Dict[str, TSTypeAlias] | None = None, + functions: Dict[str, TSCallable] | None = None, + namespaces: Dict[str, TSNamespace] | None = None, + variables: List[TSVariableDeclaration] | None = None, +) -> TSNamespace: + return TSNamespace( + name=props.get("name", ""), + signature=props.get("signature", ""), + comments=_comments(props), + classes=classes or {}, + interfaces=interfaces or {}, + enums=enums or {}, + type_aliases=type_aliases or {}, + functions=functions or {}, + variables=variables or [], + namespaces=namespaces or {}, + is_exported=props.get("is_exported", False), + is_ambient=props.get("is_ambient", False), + start_line=props.get("start_line", -1), + end_line=props.get("end_line", -1), + ) + + +def module(props: Props, **children: Any) -> TSModule: + return TSModule( + file_path=props.get("file_key", props.get("file_path", "")), + module_name=props.get("module_name", ""), + is_tsx=props.get("is_tsx", False), + is_declaration_file=props.get("is_declaration_file", False), + content_hash=props.get("content_hash"), + last_modified=props.get("last_modified"), + file_size=props.get("file_size"), + **children, + ) diff --git a/cldk/analysis/typescript/typescript_analysis.py b/cldk/analysis/typescript/typescript_analysis.py new file mode 100644 index 0000000..cba14c9 --- /dev/null +++ b/cldk/analysis/typescript/typescript_analysis.py @@ -0,0 +1,293 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""TypeScript analysis facade. + +Thin, read-only query layer over the canonical ``TSApplication`` produced by the +codeanalyzer-typescript backend. Mirrors the method vocabulary of ``JavaAnalysis`` / +``PythonAnalysis`` (there is no shared base class — the facades match by convention) and, like +those, delegates all indexing and query work to its backend (:class:`TSCodeanalyzer`). +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Dict, List, Set, Tuple + +import networkx as nx + +from cldk.analysis.typescript.backend import TSAnalysisBackend +from cldk.analysis.typescript.codeanalyzer import TSCodeanalyzer +from cldk.analysis.typescript.neo4j import Neo4jConnectionConfig, TSNeo4jBackend +from cldk.models.typescript import ( + TSApplication, + TSCallable, + TSCallsite, + TSClass, + TSClassAttribute, + TSDecorator, + TSEnum, + TSEnumMember, + TSExport, + TSExternalSymbol, + TSImport, + TSInterface, + TSModule, + TSTypeAlias, + TSVariableDeclaration, +) + + +class TypeScriptAnalysis: + """Analysis facade for TypeScript projects. + + Delegates every query to a backend. Two interchangeable backends exist, both exposing the + same method surface: + + * :class:`TSCodeanalyzer` (default) — walks the in-memory pydantic ``TSApplication`` / a + NetworkX call graph built from ``analysis.json``; + * :class:`TSNeo4jBackend` — answers the *same* ``get_*`` queries with Cypher over the graph + ``codeanalyzer-typescript`` emits with ``--emit neo4j``. Selected by passing + ``neo4j_config``. + """ + + def __init__( + self, + project_dir: str | Path | None, + analysis_level: str, + analysis_backend_path: str | None, + analysis_json_path: str | Path | None, + target_files: List[str] | None, + eager_analysis: bool, + neo4j_config: Neo4jConnectionConfig | None = None, + ) -> None: + self.project_dir = project_dir + self.analysis_level = analysis_level + self.analysis_backend_path = analysis_backend_path + self.analysis_json_path = analysis_json_path + self.target_files = target_files + self.eager_analysis = eager_analysis + self.neo4j_config = neo4j_config + self.backend: TSAnalysisBackend + if neo4j_config is not None: + self.backend = TSNeo4jBackend( + project_dir=project_dir, + analysis_backend_path=analysis_backend_path, + analysis_level=analysis_level, + eager_analysis=eager_analysis, + target_files=target_files, + neo4j_uri=neo4j_config.uri, + neo4j_username=neo4j_config.username, + neo4j_password=neo4j_config.password, + neo4j_database=neo4j_config.database, + application_name=neo4j_config.application_name, + build_db=neo4j_config.build_db, + ) + else: + self.backend = TSCodeanalyzer( + project_dir=project_dir, + analysis_backend_path=analysis_backend_path, + analysis_json_path=analysis_json_path, + analysis_level=analysis_level, + eager_analysis=eager_analysis, + target_files=target_files, + ) + self.application: TSApplication = self.backend.get_application() + + # -----[ Tier A: lifecycle / whole-program ]----- + def get_application_view(self) -> TSApplication: + return self.backend.get_application() + + def get_symbol_table(self) -> Dict[str, TSModule]: + return self.backend.get_symbol_table() + + def get_modules(self) -> List[TSModule]: + return self.backend.get_modules() + + def get_call_graph(self) -> nx.DiGraph: + """NetworkX DiGraph of callable signatures (and phantom external symbols) connected by the + identity-only call edges.""" + return self.backend.get_call_graph() + + def get_external_symbols(self) -> Dict[str, TSExternalSymbol]: + """The phantom (external) call targets — imported/required library members the call graph + points at (e.g. ``node:fs.readFileSync``, ``js-yaml.load``). Useful for source→sink + reachability.""" + return self.backend.get_external_symbols() + + def get_call_graph_json(self) -> str: + return self.backend.get_call_graph_json() + + def get_callers(self, target_class_name: str, target_method_declaration: str | None = None) -> Dict: + """Callers of a method, with the connecting call-graph edge metadata (``provenance`` / + ``tags``). Pass a bare signature as the first argument for module-level functions or + external (phantom) targets.""" + return self.backend.get_all_callers(target_class_name, target_method_declaration) + + def get_callees(self, source_class_name: str, source_method_declaration: str | None = None) -> Dict: + """Callees of a method, with the connecting call-graph edge metadata.""" + return self.backend.get_all_callees(source_class_name, source_method_declaration) + + def get_class_call_graph(self, qualified_class_name: str, method_signature: str | None = None) -> List[Tuple[str, str]]: + """Call-graph edges reachable from a class (or one of its methods).""" + return self.backend.get_class_call_graph(qualified_class_name, method_signature) + + def get_class_hierarchy(self) -> nx.DiGraph: + """Inheritance/implementation graph: an edge child → base for every base_class.""" + return self.backend.get_class_hierarchy() + + # -----[ call sites ]----- + def get_call_sites(self, qualified_callable_name: str) -> List[TSCallsite]: + """The rich, syntactic call sites *inside* a callable (receiver/argument types, resolved + ``callee_signature``, source position).""" + return self.backend.get_call_sites(qualified_callable_name) + + def get_calling_lines(self, target_signature: str) -> List[int]: + """Sorted source lines anywhere in the project where ``target_signature`` is invoked.""" + return self.backend.get_calling_lines(target_signature) + + def get_call_targets(self, source_signature: str) -> Set[str]: + """The call targets invoked from a callable, derived from its call sites.""" + return self.backend.get_call_targets(source_signature) + + # -----[ entrypoints (not yet supported) ]----- + def get_entry_point_methods(self) -> Dict[str, Dict[str, TSCallable]]: + """Return methods identified as application entry points. + + Not yet supported: the codeanalyzer-typescript backend's entrypoint detection is a stub + placeholder — ``TSApplication.entrypoints`` and ``TSCallable.is_entrypoint`` are never + populated — so this method exists for API parity with :class:`PythonAnalysis` / + :class:`JavaAnalysis` but raises. + + Raises: + NotImplementedError: Always. + """ + raise NotImplementedError("Entrypoint detection is not implemented in the codeanalyzer-typescript backend yet.") + + def get_service_entry_point_methods(self, **kwargs) -> Dict[str, Dict[str, TSCallable]]: + """Return methods that serve as service entry points (e.g. Express/NestJS routes). + + Not yet supported; see :meth:`get_entry_point_methods`. + + Raises: + NotImplementedError: Always. + """ + raise NotImplementedError("Entrypoint detection is not implemented in the codeanalyzer-typescript backend yet.") + + # -----[ Tier B: navigation ]----- + def get_classes(self) -> Dict[str, TSClass]: + return self.backend.get_all_classes() + + def get_class(self, qualified_class_name: str) -> TSClass | None: + return self.backend.get_class(qualified_class_name) + + def get_classes_by_criteria(self, inclusions: List[str] | None = None, exclusions: List[str] | None = None) -> Dict[str, TSClass]: + inclusions = inclusions or [] + exclusions = exclusions or [] + result: Dict[str, TSClass] = {} + for sig, cls in self.backend.get_all_classes().items(): + selected = any(inc in sig for inc in inclusions) + if any(exc in sig for exc in exclusions): + selected = False + if selected: + result[sig] = cls + return result + + def get_interfaces(self) -> Dict[str, TSInterface]: + return self.backend.get_all_interfaces() + + def get_enums(self) -> Dict[str, TSEnum]: + return self.backend.get_all_enums() + + def get_enum_members(self, qualified_enum_name: str) -> List[TSEnumMember]: + return self.backend.get_enum_members(qualified_enum_name) + + def get_type_aliases(self) -> Dict[str, TSTypeAlias]: + return self.backend.get_all_type_aliases() + + def get_functions(self) -> Dict[str, TSCallable]: + """Top-level (module/namespace) functions.""" + return self.backend.get_all_functions() + + def get_methods(self) -> Dict[str, Dict[str, TSCallable]]: + """All methods grouped by class/interface signature.""" + return self.backend.get_all_methods_in_application() + + def get_methods_in_class(self, qualified_class_name: str) -> Dict[str, TSCallable]: + return self.backend.get_all_methods_in_class(qualified_class_name) + + def get_method(self, qualified_class_name: str, qualified_method_name: str) -> TSCallable | None: + return self.backend.get_method(qualified_class_name, qualified_method_name) + + def get_method_parameters(self, qualified_class_name: str, qualified_method_name: str) -> List[str]: + return self.backend.get_method_parameters(qualified_class_name, qualified_method_name) + + def get_constructors(self, qualified_class_name: str) -> Dict[str, TSCallable]: + return self.backend.get_all_constructors(qualified_class_name) + + def get_fields(self, qualified_class_name: str) -> List[TSClassAttribute]: + return self.backend.get_all_fields(qualified_class_name) + + def get_interface_properties(self, qualified_interface_name: str) -> List[TSClassAttribute]: + return self.backend.get_interface_properties(qualified_interface_name) + + def get_imports(self) -> Dict[str, List[TSImport]]: + return self.backend.get_imports() + + def get_exports(self) -> Dict[str, List[TSExport]]: + return self.backend.get_all_exports() + + def get_variables(self) -> Dict[str, List[TSVariableDeclaration]]: + """Module-level variable declarations per file.""" + return self.backend.get_all_variables() + + def get_typescript_file(self, qualified_name: str) -> str | None: + """File path declaring the class/interface/enum/callable with the given signature.""" + return self.backend.get_typescript_file(qualified_name) + + def get_typescript_module(self, file_path: str) -> TSModule | None: + return self.backend.get_typescript_module(file_path) + + def get_nested_classes(self, qualified_class_name: str) -> List[TSClass]: + return self.backend.get_all_nested_classes(qualified_class_name) + + def get_sub_classes(self, qualified_class_name: str) -> Dict[str, TSClass]: + return self.backend.get_all_sub_classes(qualified_class_name) + + def get_extended_classes(self, qualified_class_name: str) -> List[str]: + """The base types a class extends (base_classes minus the implemented interfaces).""" + return self.backend.get_extended_classes(qualified_class_name) + + def get_implemented_interfaces(self, qualified_class_name: str) -> List[str]: + return self.backend.get_implemented_interfaces(qualified_class_name) + + # -----[ decorators ]----- + def get_decorators(self, qualified_callable_name: str) -> List[TSDecorator]: + """Structured decorators (with arguments) applied to a callable.""" + return self.backend.get_decorators(qualified_callable_name) + + def get_class_decorators(self, qualified_class_name: str) -> List[TSDecorator]: + """Structured decorators (with arguments) applied to a class.""" + return self.backend.get_class_decorators(qualified_class_name) + + def get_methods_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + """Map each requested decorator name to the signatures of callables carrying it. TS + decorators are captured structurally, so this is populatable at level 1.""" + return self.backend.get_methods_with_decorators(decorators) + + def get_classes_with_decorators(self, decorators: List[str]) -> Dict[str, List[str]]: + """Map each requested decorator name to the signatures of classes carrying it.""" + return self.backend.get_classes_with_decorators(decorators) diff --git a/cldk/core.py b/cldk/core.py index 2dc5677..7ceb9d7 100644 --- a/cldk/core.py +++ b/cldk/core.py @@ -49,6 +49,8 @@ from cldk.analysis.java import JavaAnalysis from cldk.analysis.commons.treesitter import TreesitterJava from cldk.analysis.python.python_analysis import PythonAnalysis +from cldk.analysis.typescript import TypeScriptAnalysis +from cldk.analysis.typescript.neo4j import Neo4jConnectionConfig from cldk.utils.exceptions import CldkInitializationException from cldk.utils.sanitization.java import TreesitterSanitizer @@ -108,7 +110,8 @@ def analysis( cache_dir: str | Path | None = None, use_codeql: bool = True, use_ray: bool = False, - ) -> JavaAnalysis | PythonAnalysis | CAnalysis: + neo4j_config: "Neo4jConnectionConfig | None" = None, + ) -> JavaAnalysis | PythonAnalysis | CAnalysis | TypeScriptAnalysis: """Initialize and return a language-specific analysis facade. This factory method creates an appropriate analysis object based on the @@ -219,8 +222,7 @@ def analysis( raise CldkInitializationException("source_code mode is not supported for Python; please pass project_path.") if analysis_backend_path is not None: raise CldkInitializationException( - "analysis_backend_path is Java-only (it locates codeanalyzer-*.jar). " - "For Python, use cache_dir for the backend's virtualenv/CodeQL cache." + "analysis_backend_path is Java-only (it locates codeanalyzer-*.jar). " "For Python, use cache_dir for the backend's virtualenv/CodeQL cache." ) return PythonAnalysis( project_dir=project_path, @@ -234,6 +236,23 @@ def analysis( ) elif self.language == "c": return CAnalysis(project_dir=project_path) + elif self.language == "typescript": + if source_code is not None: + raise CldkInitializationException("source_code mode is not supported for TypeScript; please pass project_path.") + if cache_dir is not None or use_ray: + raise CldkInitializationException( + "cache_dir and use_ray are Python-only. For TypeScript, use analysis_backend_path " + "to locate the codeanalyzer-typescript binary (or set $CODEANALYZER_TS_BIN)." + ) + return TypeScriptAnalysis( + project_dir=project_path, + analysis_level=analysis_level, + analysis_backend_path=analysis_backend_path, + analysis_json_path=analysis_json_path, + target_files=target_files, + eager_analysis=eager, + neo4j_config=neo4j_config, + ) else: raise NotImplementedError(f"Analysis support for {self.language} is not implemented yet.") diff --git a/cldk/models/typescript/__init__.py b/cldk/models/typescript/__init__.py new file mode 100644 index 0000000..f136327 --- /dev/null +++ b/cldk/models/typescript/__init__.py @@ -0,0 +1,69 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""TypeScript model package — identity-only schema mirror of codeanalyzer-ts/src/schema.ts.""" + +from .models import ( + TSApplication, + TSCallEdge, + TSCallable, + TSCallableParameter, + TSCallsite, + TSClass, + TSClassAttribute, + TSComment, + TSDecorator, + TSEntrypoint, + TSEnum, + TSEnumMember, + TSExport, + TSExternalSymbol, + TSImport, + TSInterface, + TSModule, + TSNamespace, + TSOverloadSignature, + TSSymbol, + TSTypeAlias, + TSTypeParameter, + TSVariableDeclaration, +) + +__all__ = [ + "TSApplication", + "TSCallEdge", + "TSCallable", + "TSCallableParameter", + "TSCallsite", + "TSClass", + "TSClassAttribute", + "TSComment", + "TSDecorator", + "TSEntrypoint", + "TSEnum", + "TSEnumMember", + "TSExport", + "TSExternalSymbol", + "TSImport", + "TSInterface", + "TSModule", + "TSNamespace", + "TSOverloadSignature", + "TSSymbol", + "TSTypeAlias", + "TSTypeParameter", + "TSVariableDeclaration", +] diff --git a/cldk/models/typescript/models.py b/cldk/models/typescript/models.py new file mode 100644 index 0000000..25ac984 --- /dev/null +++ b/cldk/models/typescript/models.py @@ -0,0 +1,445 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""TypeScript schema models. + +Pydantic mirror of the analyzer-side schema in ``codeanalyzer-ts/src/schema.ts``. These models +are BOTH the SDK binding and the validation target the analyzer's ``analysis.json`` is checked +against — they must be co-evolved with ``schema.ts`` field-for-field. + +The invariant spine matches the identity-only Python model (``PyApplication``/``PyCallEdge``): +``TSApplication { symbol_table: Dict[path, TSModule], call_graph: List[TSCallEdge] }`` with +edges whose ``source``/``target`` are bare ``TSCallable.signature`` strings. Everything else is +TypeScript-native (interface / type-alias / enum / namespace node kinds; generics; modifiers). + +``extra="forbid"`` is intentional: it makes any drift between the analyzer's JSON and these +models fail loudly during development rather than silently dropping fields. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, ConfigDict +from typing_extensions import Literal + + +class _Base(BaseModel): + model_config = ConfigDict(extra="forbid") + + +# ---------------------------------------------------------------------------------------------- +# Leaf models +# ---------------------------------------------------------------------------------------------- + + +class TSImport(_Base): + """A TypeScript import binding (one entry per imported name).""" + + module: str + name: str + alias: Optional[str] = None + is_type_only: bool = False + import_kind: str = "named" # named | default | namespace | side_effect + start_line: int = -1 + end_line: int = -1 + start_column: int = -1 + end_column: int = -1 + + +class TSExport(_Base): + """A TypeScript export / re-export binding.""" + + module: Optional[str] = None + name: str + alias: Optional[str] = None + is_type_only: bool = False + export_kind: str = "named" # named | default | namespace | re_export + start_line: int = -1 + end_line: int = -1 + start_column: int = -1 + end_column: int = -1 + + +class TSComment(_Base): + """A comment or JSDoc block.""" + + content: str + is_docstring: bool = False + start_line: int = -1 + end_line: int = -1 + start_column: int = -1 + end_column: int = -1 + + +class TSSymbol(_Base): + """A symbol referenced or declared in code.""" + + name: str + scope: str + kind: str + type: Optional[str] = None + qualified_name: Optional[str] = None + is_builtin: bool = False + lineno: int = -1 + col_offset: int = -1 + + +class TSVariableDeclaration(_Base): + """A variable / const / let declaration.""" + + name: str + type: Optional[str] = None + initializer: Optional[str] = None + value: Optional[Any] = None + scope: str = "module" # module | namespace | class | function | block + declaration_kind: str = "unknown" # const | let | var | using | unknown + is_readonly: bool = False + is_exported: bool = False + start_line: int = -1 + end_line: int = -1 + start_column: int = -1 + end_column: int = -1 + + +class TSDecorator(_Base): + """A decorator applied to a class / member / parameter (structured, with arguments).""" + + name: str + qualified_name: Optional[str] = None + positional_arguments: List[str] = [] + keyword_arguments: Dict[str, str] = {} + start_line: int = -1 + end_line: int = -1 + start_column: int = -1 + end_column: int = -1 + + +class TSTypeParameter(_Base): + """A generic type parameter, e.g. ``T extends Base = Default``.""" + + name: str + constraint: Optional[str] = None + default: Optional[str] = None + + +class TSCallableParameter(_Base): + """A function / method parameter.""" + + name: str + type: Optional[str] = None + default_value: Optional[str] = None + is_optional: bool = False + is_rest: bool = False + is_readonly: bool = False + accessibility: Optional[str] = None + decorators: List[TSDecorator] = [] + start_line: int = -1 + end_line: int = -1 + start_column: int = -1 + end_column: int = -1 + + +class TSCallsite(_Base): + """Rich per-call metadata, attached to the caller. ``callee_signature`` is backfilled by the + resolver call graph.""" + + method_name: str + receiver_expr: Optional[str] = None + receiver_type: Optional[str] = None + argument_types: List[str] = [] + type_arguments: List[str] = [] + return_type: Optional[str] = None + callee_signature: Optional[str] = None + is_constructor_call: bool = False + is_optional_chain: bool = False + start_line: int = -1 + start_column: int = -1 + end_line: int = -1 + end_column: int = -1 + + +class TSOverloadSignature(_Base): + """An overload signature attached to the implementation callable.""" + + parameters: List[TSCallableParameter] = [] + return_type: Optional[str] = None + type_parameters: List[TSTypeParameter] = [] + start_line: int = -1 + end_line: int = -1 + + +class TSClassAttribute(_Base): + """A class property / field (also covers constructor parameter-properties).""" + + name: str + type: Optional[str] = None + comments: List[TSComment] = [] + decorators: List[TSDecorator] = [] + initializer: Optional[str] = None + accessibility: Optional[str] = None + is_static: bool = False + is_readonly: bool = False + is_optional: bool = False + is_abstract: bool = False + start_line: int = -1 + end_line: int = -1 + + +# ---------------------------------------------------------------------------------------------- +# Callable +# ---------------------------------------------------------------------------------------------- + + +class TSCallable(_Base): + """A function / method / constructor / accessor / arrow function.""" + + name: str + path: str + signature: str # e.g. src/user.UserService.getUser — the edge id + comments: List[TSComment] = [] + decorators: List[TSDecorator] = [] + parameters: List[TSCallableParameter] = [] + type_parameters: List[TSTypeParameter] = [] + return_type: Optional[str] = None + code: Optional[str] = None + start_line: int = -1 + end_line: int = -1 + code_start_line: int = -1 + accessed_symbols: List[TSSymbol] = [] + call_sites: List[TSCallsite] = [] + inner_callables: Dict[str, "TSCallable"] = {} + inner_classes: Dict[str, "TSClass"] = {} + local_variables: List[TSVariableDeclaration] = [] + cyclomatic_complexity: int = 0 + is_entrypoint: bool = False + entrypoint_framework: Optional[str] = None + # TypeScript-native typed fields + kind: str = "function" # function | method | constructor | getter | setter | arrow | function_expression + accessibility: Optional[str] = None + is_static: bool = False + is_abstract: bool = False + is_async: bool = False + is_generator: bool = False + is_optional: bool = False + is_readonly: bool = False + is_exported: bool = False + is_ambient: bool = False + is_implicit: bool = False + accessor_kind: Optional[str] = None + overload_signatures: List[TSOverloadSignature] = [] + + def __hash__(self) -> int: + return hash(self.signature) + + +# ---------------------------------------------------------------------------------------------- +# Type-kind node models +# ---------------------------------------------------------------------------------------------- + + +class TSClass(_Base): + """A class declaration.""" + + name: str + signature: str + comments: List[TSComment] = [] + code: Optional[str] = None + decorators: List[TSDecorator] = [] + base_classes: List[str] = [] # spine: union of extends + implements (signature strings) + implements_types: List[str] = [] # typed split: just the implemented interfaces + type_parameters: List[TSTypeParameter] = [] + methods: Dict[str, TSCallable] = {} + attributes: Dict[str, TSClassAttribute] = {} + inner_classes: Dict[str, "TSClass"] = {} + is_abstract: bool = False + is_exported: bool = False + is_ambient: bool = False + start_line: int = -1 + end_line: int = -1 + + def __hash__(self) -> int: + return hash(self.signature) + + +class TSInterface(_Base): + """An interface declaration (TS node kind).""" + + name: str + signature: str + comments: List[TSComment] = [] + code: Optional[str] = None + base_classes: List[str] = [] # extended interfaces (signature strings) + type_parameters: List[TSTypeParameter] = [] + methods: Dict[str, TSCallable] = {} + properties: Dict[str, TSClassAttribute] = {} + call_signatures: List[str] = [] + index_signatures: List[str] = [] + is_exported: bool = False + is_ambient: bool = False + start_line: int = -1 + end_line: int = -1 + + def __hash__(self) -> int: + return hash(self.signature) + + +class TSEnumMember(_Base): + """A member of an enum.""" + + name: str + value: Optional[str] = None + start_line: int = -1 + end_line: int = -1 + + +class TSEnum(_Base): + """An enum declaration (TS node kind).""" + + name: str + signature: str + comments: List[TSComment] = [] + code: Optional[str] = None + members: List[TSEnumMember] = [] + is_const: bool = False + is_exported: bool = False + is_ambient: bool = False + start_line: int = -1 + end_line: int = -1 + + def __hash__(self) -> int: + return hash(self.signature) + + +class TSTypeAlias(_Base): + """A type-alias declaration (TS node kind).""" + + name: str + signature: str + comments: List[TSComment] = [] + code: Optional[str] = None + aliased_type: str = "" + type_parameters: List[TSTypeParameter] = [] + is_exported: bool = False + is_ambient: bool = False + start_line: int = -1 + end_line: int = -1 + + def __hash__(self) -> int: + return hash(self.signature) + + +class TSNamespace(_Base): + """A namespace / module block (TS node kind) — recursive container.""" + + name: str + signature: str + comments: List[TSComment] = [] + classes: Dict[str, TSClass] = {} + interfaces: Dict[str, TSInterface] = {} + enums: Dict[str, TSEnum] = {} + type_aliases: Dict[str, TSTypeAlias] = {} + functions: Dict[str, TSCallable] = {} + variables: List[TSVariableDeclaration] = [] + namespaces: Dict[str, "TSNamespace"] = {} + is_exported: bool = False + is_ambient: bool = False + start_line: int = -1 + end_line: int = -1 + + def __hash__(self) -> int: + return hash(self.signature) + + +# ---------------------------------------------------------------------------------------------- +# Module / edge / entrypoint / application +# ---------------------------------------------------------------------------------------------- + + +class TSModule(_Base): + """A compilation unit (a .ts/.tsx file).""" + + file_path: str + module_name: str + imports: List[TSImport] = [] + exports: List[TSExport] = [] + comments: List[TSComment] = [] + classes: Dict[str, TSClass] = {} + interfaces: Dict[str, TSInterface] = {} + enums: Dict[str, TSEnum] = {} + type_aliases: Dict[str, TSTypeAlias] = {} + functions: Dict[str, TSCallable] = {} + namespaces: Dict[str, TSNamespace] = {} + variables: List[TSVariableDeclaration] = [] + is_tsx: bool = False + is_declaration_file: bool = False + content_hash: Optional[str] = None + last_modified: Optional[float] = None + file_size: Optional[int] = None + + +class TSCallEdge(_Base): + """Identity-only call-graph edge. ``source``/``target`` are ``TSCallable.signature`` strings.""" + + source: str + target: str + type: Literal["CALL_DEP"] = "CALL_DEP" + weight: int = 1 + provenance: List[str] = [] + tags: Dict[str, str] = {} + + +class TSExternalSymbol(_Base): + """A WALA-style phantom node: a synthetic stub for a call target OUTSIDE the project (an + imported/required library member). An edge's ``target`` byte-matches either a real + ``TSCallable.signature`` or a ``TSExternalSymbol.signature``, so the call graph stays + dangling-free while still recording external (e.g. sink) calls.""" + + signature: str # e.g. "node:fs.readFileSync", "express.Router.get" + name: str + module: str + kind: str = "unknown" + is_external: bool = True + + +class TSEntrypoint(_Base): + """A framework entrypoint (populated by level-2 finders; empty for level 1).""" + + signature: str + framework: str + detection_source: str + route_path: Optional[str] = None + http_methods: List[str] = [] + source_file: Optional[str] = None + tags: Dict[str, str] = {} + + +class TSApplication(_Base): + """The root analysis object emitted as analysis.json.""" + + symbol_table: Dict[str, TSModule] + call_graph: List[TSCallEdge] = [] + external_symbols: Dict[str, TSExternalSymbol] = {} + entrypoints: Dict[str, List[TSEntrypoint]] = {} + + +# Resolve forward references for the mutually-recursive models. +TSCallable.model_rebuild() +TSClass.model_rebuild() +TSInterface.model_rebuild() +TSNamespace.model_rebuild() +TSModule.model_rebuild() +TSApplication.model_rebuild() diff --git a/pyproject.toml b/pyproject.toml index a1babea..2b2fd42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,13 @@ dependencies = [ "clang==17.0.6", "libclang==17.0.6", "codeanalyzer-python==0.1.14", + "codeanalyzer-typescript==0.2.1", +] + +[project.optional-dependencies] +# The Neo4j-backed TypeScript analysis backend (cldk.analysis.typescript.neo4j). +neo4j = [ + "neo4j>=5.14,<7", ] [project.urls] @@ -85,6 +92,7 @@ include = [ [tool.backend-versions] codeanalyzer-java = "2.3.8" codeanalyzer-python = "0.1.14" +codeanalyzer-typescript = "0.1.0" ######################################## # Tool configurations @@ -138,3 +146,5 @@ sample-c-application = "tests/resources/c/application/" sample-application = "tests/resources/java/application/" sample-application-analysis-json = "tests/resources/java/analysis_json/" codeanalyzer-jar-path = "cldk/analysis/java/codeanalyzer/jar/" +sample-typescript-application = "tests/resources/typescript/application/" +sample-typescript-analysis-json = "tests/resources/typescript/analysis_json/" diff --git a/tests/analysis/java/test_java_backend_contract.py b/tests/analysis/java/test_java_backend_contract.py new file mode 100644 index 0000000..45cff1c --- /dev/null +++ b/tests/analysis/java/test_java_backend_contract.py @@ -0,0 +1,47 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""The Java analysis backend contract (introspection only — no analyzer run needed).""" + +import re +from pathlib import Path + +import pytest + +from cldk.analysis.java.backend import JavaAnalysisBackend +from cldk.analysis.java.codeanalyzer.codeanalyzer import JCodeanalyzer + + +def test_backend_subclasses_contract(): + assert issubclass(JCodeanalyzer, JavaAnalysisBackend) + + +def test_contract_is_abstract(): + with pytest.raises(TypeError): + JavaAnalysisBackend() + + +def test_backend_fully_implements_contract(): + assert JCodeanalyzer.__abstractmethods__ == frozenset() + + +def test_contract_covers_every_method_the_facade_delegates(): + """Every ``self.backend.X`` the JavaAnalysis facade calls must be on the contract.""" + facade_src = (Path(__file__).resolve().parents[3] / "cldk" / "analysis" / "java" / "java_analysis.py").read_text() + delegated = set(re.findall(r"self\.backend\.([a-zA-Z_]+)", facade_src)) + contract = {n for n in dir(JavaAnalysisBackend) if not n.startswith("__")} + missing = delegated - contract + assert not missing, f"facade delegates to backend methods absent from the contract: {sorted(missing)}" diff --git a/tests/analysis/python/test_python_analysis.py b/tests/analysis/python/test_python_analysis.py index f2003f7..c4dc933 100644 --- a/tests/analysis/python/test_python_analysis.py +++ b/tests/analysis/python/test_python_analysis.py @@ -49,9 +49,7 @@ class FakeBackend: def __init__(self, **kwargs): captured.update(kwargs) - monkeypatch.setattr( - "cldk.analysis.python.python_analysis.PyCodeanalyzer", FakeBackend - ) + monkeypatch.setattr("cldk.analysis.python.python_analysis.PyCodeanalyzer", FakeBackend) CLDK(language="python").analysis(project_path=tmp_path, use_codeql=False) assert captured["use_codeql"] is False @@ -62,6 +60,30 @@ def __init__(self, **kwargs): assert captured["use_codeql"] is True +def test_use_ray_forwarded_through_facade(monkeypatch, tmp_path): + """Regression: CLDK.analysis() must forward use_ray down to the backend. + + use_ray is a Python-only option lifted all the way up to the public API + (CLDK.analysis → PythonAnalysis → PyCodeanalyzer → AnalysisOptions.using_ray); + the façade must not silently drop it (mirrors the use_codeql guard). + """ + captured = {} + + class FakeBackend: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr("cldk.analysis.python.python_analysis.PyCodeanalyzer", FakeBackend) + + CLDK(language="python").analysis(project_path=tmp_path, use_ray=True) + assert captured["use_ray"] is True + + # Off by default; the façade must forward that faithfully too. + captured.clear() + CLDK(language="python").analysis(project_path=tmp_path) + assert captured["use_ray"] is False + + def test_cache_dir_forwarded_through_facade(monkeypatch, tmp_path): """cache_dir must reach the backend as cache_dir (not analysis_backend_path).""" captured = {} @@ -70,9 +92,7 @@ class FakeBackend: def __init__(self, **kwargs): captured.update(kwargs) - monkeypatch.setattr( - "cldk.analysis.python.python_analysis.PyCodeanalyzer", FakeBackend - ) + monkeypatch.setattr("cldk.analysis.python.python_analysis.PyCodeanalyzer", FakeBackend) cache = tmp_path / "mycache" CLDK(language="python").analysis(project_path=tmp_path, cache_dir=cache) @@ -83,6 +103,4 @@ def __init__(self, **kwargs): def test_python_rejects_java_only_analysis_backend_path(tmp_path): """analysis_backend_path is Java-only; Python mode must reject it.""" with pytest.raises(CldkInitializationException, match="Java-only"): - CLDK(language="python").analysis( - project_path=tmp_path, analysis_backend_path="/some/jar/dir" - ) + CLDK(language="python").analysis(project_path=tmp_path, analysis_backend_path="/some/jar/dir") diff --git a/tests/analysis/python/test_python_backend_contract.py b/tests/analysis/python/test_python_backend_contract.py new file mode 100644 index 0000000..c8af237 --- /dev/null +++ b/tests/analysis/python/test_python_backend_contract.py @@ -0,0 +1,47 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""The Python analysis backend contract (introspection only — no analyzer run needed).""" + +import re +from pathlib import Path + +import pytest + +from cldk.analysis.python.backend import PythonAnalysisBackend +from cldk.analysis.python.codeanalyzer.codeanalyzer import PyCodeanalyzer + + +def test_backend_subclasses_contract(): + assert issubclass(PyCodeanalyzer, PythonAnalysisBackend) + + +def test_contract_is_abstract(): + with pytest.raises(TypeError): + PythonAnalysisBackend() + + +def test_backend_fully_implements_contract(): + assert PyCodeanalyzer.__abstractmethods__ == frozenset() + + +def test_contract_covers_every_method_the_facade_delegates(): + """Every ``self.backend.X`` the PythonAnalysis facade calls must be on the contract.""" + facade_src = (Path(__file__).resolve().parents[3] / "cldk" / "analysis" / "python" / "python_analysis.py").read_text() + delegated = set(re.findall(r"self\.backend\.([a-zA-Z_]+)", facade_src)) + contract = {n for n in dir(PythonAnalysisBackend) if not n.startswith("__")} + missing = delegated - contract + assert not missing, f"facade delegates to backend methods absent from the contract: {sorted(missing)}" diff --git a/tests/analysis/typescript/__init__.py b/tests/analysis/typescript/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/analysis/typescript/conftest.py b/tests/analysis/typescript/conftest.py new file mode 100644 index 0000000..e58c385 --- /dev/null +++ b/tests/analysis/typescript/conftest.py @@ -0,0 +1,68 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""TypeScript test fixtures. + +Also overrides the heavy, network/zip-dependent session autouse fixtures from the top-level +``tests/conftest.py`` with no-ops, so the (fully mocked) TypeScript tests run in isolation +without downloading daytrader or extracting the Java/C sample zips. +""" + +import json +from pathlib import Path + +import pytest +import toml + + +def _testing_cfg() -> dict: + root = Path(__file__).resolve().parents[3] + return toml.load(root / "pyproject.toml")["tool"]["cldk"]["testing"] + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[3] + + +# --- neutralize the heavy autouse fixtures from the parent conftest for this subtree --- +@pytest.fixture(scope="session", autouse=True) +def test_fixture(): # noqa: D401 - override + yield None + + +@pytest.fixture(scope="session", autouse=True) +def test_fixture_pbw(): # noqa: D401 - override + yield None + + +@pytest.fixture(scope="session", autouse=True) +def test_fixture_binutils(): # noqa: D401 - override + yield None + + +# --- TypeScript-specific fixtures --- +@pytest.fixture(scope="session") +def typescript_application() -> Path: + """Path to the sample TypeScript application fixture.""" + return (_repo_root() / _testing_cfg()["sample-typescript-application"]).resolve() + + +@pytest.fixture(scope="session") +def typescript_analysis_json() -> str: + """The pre-computed analysis.json contents (as a JSON string) for the sample TS app.""" + path = _repo_root() / _testing_cfg()["sample-typescript-analysis-json"] / "slim" / "analysis.json" + with open(path, encoding="utf-8") as f: + return json.dumps(json.load(f)) diff --git a/tests/analysis/typescript/test_typescript_analysis.py b/tests/analysis/typescript/test_typescript_analysis.py new file mode 100644 index 0000000..14d01c1 --- /dev/null +++ b/tests/analysis/typescript/test_typescript_analysis.py @@ -0,0 +1,211 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Tests for the TypeScript analysis facade (backend subprocess mocked).""" + +from unittest.mock import MagicMock, patch + +import networkx as nx +import pytest + +from cldk import CLDK +from cldk.analysis import AnalysisLevel +from cldk.utils.exceptions import CldkInitializationException + + +@pytest.fixture +def ts_analysis(typescript_application, typescript_analysis_json, monkeypatch): + """Build a TypeScriptAnalysis with the codeanalyzer-typescript subprocess mocked to return + the pre-computed analysis.json fixture.""" + monkeypatch.setenv("CODEANALYZER_TS_BIN", "codeanalyzer-typescript") + with patch("cldk.analysis.typescript.codeanalyzer.codeanalyzer.subprocess.run") as run_mock: + run_mock.return_value = MagicMock(stdout=typescript_analysis_json, returncode=0) + return CLDK(language="typescript").analysis( + project_path=typescript_application, + analysis_backend_path=None, + eager=True, + analysis_level=AnalysisLevel.call_graph, + ) + + +def test_symbol_table_is_not_empty(ts_analysis): + symtab = ts_analysis.get_symbol_table() + assert symtab is not None + assert len(symtab) == 6 + assert "src/models.ts" in symtab + + +def test_call_graph_has_no_dangling_nodes(ts_analysis): + graph = ts_analysis.get_call_graph() + assert isinstance(graph, nx.DiGraph) + assert graph.number_of_edges() > 0 + # every edge endpoint is a node — internal callable OR phantom external symbol + nodes = set(graph.nodes) + for src, dst in graph.edges: + assert src in nodes + assert dst in nodes + + +def test_phantom_external_nodes(ts_analysis): + # imported Node-builtin calls become phantom (external) nodes, not dropped edges + ext = ts_analysis.get_external_symbols() + assert "node:crypto.createHash" in ext + assert ext["node:crypto.createHash"].module == "node:crypto" + assert ext["node:crypto.createHash"].is_external is True + assert "node:path.extname" in ext + + graph = ts_analysis.get_call_graph() + assert graph.has_edge("src/external.fingerprint", "node:crypto.createHash") + data = graph.get_edge_data("src/external.fingerprint", "node:crypto.createHash") + assert data["tags"].get("ts.external") == "true" + assert data["provenance"] == ["import"] + assert graph.nodes["node:crypto.createHash"]["external"] is True + # internal callers can be found via callees + callees = ts_analysis.get_callees("src/external.fingerprint") + assert "node:crypto.createHash" in {c["callee_signature"] for c in callees["callee_details"]} + + +def test_classes_interfaces_enums_type_aliases(ts_analysis): + classes = ts_analysis.get_classes() + assert "src/models.User" in classes + assert "src/services.UserService" in classes + assert set(ts_analysis.get_interfaces()) >= {"src/models.Identifiable", "src/models.Named"} + assert "src/models.Role" in ts_analysis.get_enums() + assert "src/models.UserId" in ts_analysis.get_type_aliases() + + +def test_class_inheritance_split(ts_analysis): + user = ts_analysis.get_class("src/models.User") + assert "src/models.Entity" in user.base_classes + assert ts_analysis.get_implemented_interfaces("src/models.User") == ["src/models.Named"] + assert "src/models.Entity" in ts_analysis.get_extended_classes("src/models.User") + assert user.is_abstract is False + + +def test_methods_and_constructor(ts_analysis): + methods = ts_analysis.get_methods_in_class("src/models.User") + assert "describe" in methods + assert "recordLogin" in methods + assert methods["recordLogin"].is_async is True + constructors = ts_analysis.get_constructors("src/models.User") + assert any(c.kind == "constructor" for c in constructors.values()) + + +def test_structured_decorators(ts_analysis): + decorated = ts_analysis.get_methods_with_decorators(["Controller", "Get"]) + assert any(sig.endswith("UserController.show") for sig in decorated["Get"]) + controller = ts_analysis.get_class("src/controllers.UserController") + assert [d.name for d in controller.decorators] == ["Controller"] + assert controller.decorators[0].positional_arguments == ['"/users"'] + + +def test_callers_and_callees(ts_analysis): + # bare-signature form (module-level function) + callees = ts_analysis.get_callees("src/index.main") + callee_sigs = {c["callee_signature"] for c in callees["callee_details"]} + assert "src/services.UserService.constructor" in callee_sigs + + # (class, method) form, with edge metadata surfaced + callers = ts_analysis.get_callers("src/services.UserService", "create") + assert callers["target_method"] == "src/services.UserService.create" + caller_sigs = {c["caller_signature"] for c in callers["caller_details"]} + assert "src/index.main" in caller_sigs + # the connecting edge carries provenance/tags + main_edge = next(c["edge"] for c in callers["caller_details"] if c["caller_signature"] == "src/index.main") + assert "provenance" in main_edge and "tags" in main_edge + + +def test_call_sites(ts_analysis): + # rich syntactic call sites inside a callable + sites = ts_analysis.get_call_sites("src/controllers.UserController.show") + assert any(cs.callee_signature == "src/services.UserService.create" for cs in sites) + create = next(cs for cs in sites if cs.callee_signature == "src/services.UserService.create") + assert create.receiver_type == "UserService" + assert create.start_line > 0 + + # project-wide calling lines for a target + lines = ts_analysis.get_calling_lines("src/services.UserService.create") + assert lines == sorted(lines) + assert create.start_line in lines + + # call targets derived from a callable's call sites + targets = ts_analysis.get_call_targets("src/controllers.UserController.show") + assert "src/services.UserService.create" in targets + + +def test_entrypoints_not_implemented(ts_analysis): + # entrypoint detection is a stub placeholder in the analyzer; methods exist for parity but raise + with pytest.raises(NotImplementedError): + ts_analysis.get_entry_point_methods() + with pytest.raises(NotImplementedError): + ts_analysis.get_service_entry_point_methods() + + +def test_enum_members_and_interface_properties(ts_analysis): + members = ts_analysis.get_enum_members("src/models.Role") + assert [m.name for m in members] == ["Admin", "Member", "Guest"] + props = ts_analysis.get_interface_properties("src/models.Named") + assert [p.name for p in props] == ["name"] + + +def test_exports_and_variables(ts_analysis): + exports = ts_analysis.get_exports() + variables = ts_analysis.get_variables() + # keyed by every analyzed file, even when empty + assert set(exports) == set(ts_analysis.get_symbol_table()) + assert set(variables) == set(ts_analysis.get_symbol_table()) + + +def test_class_decorators(ts_analysis): + decos = ts_analysis.get_class_decorators("src/controllers.UserController") + assert [d.name for d in decos] == ["Controller"] + by_name = ts_analysis.get_classes_with_decorators(["Controller"]) + assert "src/controllers.UserController" in by_name["Controller"] + method_decos = ts_analysis.get_decorators("src/controllers.UserController.show") + assert any(d.name == "Get" for d in method_decos) + + +def test_rta_subtype_expansion(ts_analysis): + graph = ts_analysis.get_call_graph() + announce = "src/services.announce" + targets = {dst: data for _, dst, data in graph.out_edges(announce, data=True)} + # declared-type edge to the interface method + RTA-expanded edges to the implementers + assert "src/models.Named.describe" in targets + assert targets["src/models.User.describe"]["tags"].get("ts.dispatch") == "rta" + assert targets["src/models.Robot.describe"]["tags"].get("ts.dispatch") == "rta" + + +def test_class_hierarchy_graph(ts_analysis): + hierarchy = ts_analysis.get_class_hierarchy() + assert hierarchy.has_edge("src/models.User", "src/models.Entity") + assert hierarchy.has_edge("src/models.User", "src/models.Named") + + +def test_namespace_members(ts_analysis): + classes = ts_analysis.get_classes() + assert "src/util.StringUtil.Builder" in classes + functions = ts_analysis.get_functions() + assert "src/util.StringUtil.repeat" in functions + + +def test_source_code_mode_rejected(typescript_application): + with pytest.raises(CldkInitializationException): + CLDK(language="typescript").analysis(source_code="const x = 1;") + + +def test_python_only_kwargs_rejected(typescript_application): + with pytest.raises(CldkInitializationException): + CLDK(language="typescript").analysis(project_path=typescript_application, cache_dir="/tmp/x") diff --git a/tests/analysis/typescript/test_typescript_backend_contract.py b/tests/analysis/typescript/test_typescript_backend_contract.py new file mode 100644 index 0000000..9ac7e06 --- /dev/null +++ b/tests/analysis/typescript/test_typescript_backend_contract.py @@ -0,0 +1,51 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""The TypeScript backend contract: both backends implement the same ABC (no live Neo4j needed).""" + +import inspect + +import pytest + +from cldk.analysis.typescript.backend import TSAnalysisBackend +from cldk.analysis.typescript.codeanalyzer.codeanalyzer import TSCodeanalyzer +from cldk.analysis.typescript.neo4j import TSNeo4jBackend + + +def test_backends_subclass_the_contract(): + assert issubclass(TSCodeanalyzer, TSAnalysisBackend) + assert issubclass(TSNeo4jBackend, TSAnalysisBackend) + + +def test_contract_is_abstract(): + with pytest.raises(TypeError): + TSAnalysisBackend() + + +@pytest.mark.parametrize("backend", [TSCodeanalyzer, TSNeo4jBackend]) +def test_backends_fully_implement_the_contract(backend): + # No abstract methods left unimplemented ⇒ the class is concrete/instantiable. + assert backend.__abstractmethods__ == frozenset() + + +@pytest.mark.parametrize("backend", [TSCodeanalyzer, TSNeo4jBackend]) +def test_signatures_match_the_contract(backend): + """Every abstract method's signature is preserved by each backend (params + defaults).""" + for name, base_method in inspect.getmembers(TSAnalysisBackend, predicate=inspect.isfunction): + if getattr(base_method, "__isabstractmethod__", False): + base_sig = inspect.signature(base_method) + impl_sig = inspect.signature(getattr(backend, name)) + assert impl_sig == base_sig, f"{backend.__name__}.{name} signature drifted: {impl_sig} != {base_sig}" diff --git a/tests/analysis/typescript/test_typescript_neo4j_backend.py b/tests/analysis/typescript/test_typescript_neo4j_backend.py new file mode 100644 index 0000000..52d570e --- /dev/null +++ b/tests/analysis/typescript/test_typescript_neo4j_backend.py @@ -0,0 +1,253 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Integration tests for the Neo4j-backed TypeScript analysis backend. + +These exercise the *real* pipeline: the ``codeanalyzer-typescript`` binary pushes the sample +app's graph into a live Neo4j over Bolt (``--emit neo4j``), and every assertion is answered by +Cypher in :class:`TSNeo4jBackend`. They mirror the in-memory backend's expectations from +``test_typescript_analysis.py`` so the two backends are proven to agree. + +The whole module is skipped unless a Neo4j server is reachable. Point the tests at one with: + + CLDK_TEST_NEO4J_URI=bolt://localhost:7687 \ + CLDK_TEST_NEO4J_USER=neo4j \ + CLDK_TEST_NEO4J_PASSWORD=test \ + pytest tests/analysis/typescript/test_typescript_neo4j_backend.py + +(e.g. `docker run -p 7687:7687 -e NEO4J_AUTH=neo4j/test neo4j:5`). The binary is resolved the +usual way: ``$CODEANALYZER_TS_BIN``, the ``codeanalyzer-typescript`` wheel, or a bundled binary. +""" + +import logging +import os + +import networkx as nx +import pytest + +from cldk import CLDK +from cldk.analysis import AnalysisLevel +from cldk.analysis.typescript.neo4j import Neo4jConnectionConfig + +logging.getLogger("neo4j").setLevel(logging.ERROR) + +NEO4J_URI = os.environ.get("CLDK_TEST_NEO4J_URI", "bolt://localhost:7687") +NEO4J_USER = os.environ.get("CLDK_TEST_NEO4J_USER", "neo4j") +NEO4J_PASSWORD = os.environ.get("CLDK_TEST_NEO4J_PASSWORD", "neo4j") +APP_NAME = "application" + + +def _neo4j_reachable() -> bool: + try: + from neo4j import GraphDatabase + except ModuleNotFoundError: + return False + try: + driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + driver.verify_connectivity() + driver.close() + return True + except Exception: # noqa: BLE001 - any connection failure ⇒ skip + return False + + +pytestmark = pytest.mark.skipif( + not _neo4j_reachable(), + reason=f"no Neo4j reachable at {NEO4J_URI} (set CLDK_TEST_NEO4J_URI / _USER / _PASSWORD)", +) + + +@pytest.fixture(scope="module") +def ts_neo4j(typescript_application): + """A TypeScript facade backed by Neo4j, built by pushing the sample app over Bolt.""" + config = Neo4jConnectionConfig( + uri=NEO4J_URI, + username=NEO4J_USER, + password=NEO4J_PASSWORD, + application_name=APP_NAME, + build_db=True, + ) + analysis = CLDK(language="typescript").analysis( + project_path=typescript_application, + analysis_level=AnalysisLevel.call_graph, + eager=True, # force a clean rebuild of this app's subgraph + neo4j_config=config, + ) + yield analysis + analysis.backend.close() + + +def test_backend_is_neo4j(ts_neo4j): + from cldk.analysis.typescript.neo4j import TSNeo4jBackend + + assert isinstance(ts_neo4j.backend, TSNeo4jBackend) + + +def test_symbol_table(ts_neo4j): + symtab = ts_neo4j.get_symbol_table() + assert len(symtab) == 5 + assert "src/models.ts" in symtab + assert "src/controllers.ts" in symtab + + +def test_classes_interfaces_enums_type_aliases(ts_neo4j): + classes = ts_neo4j.get_classes() + assert "src/models.User" in classes + assert "src/services.UserService" in classes + assert set(ts_neo4j.get_interfaces()) >= {"src/models.Identifiable", "src/models.Named"} + assert "src/models.Role" in ts_neo4j.get_enums() + assert "src/models.UserId" in ts_neo4j.get_type_aliases() + + +def test_class_inheritance_split(ts_neo4j): + user = ts_neo4j.get_class("src/models.User") + assert "src/models.Entity" in user.base_classes + assert ts_neo4j.get_implemented_interfaces("src/models.User") == ["src/models.Named"] + assert "src/models.Entity" in ts_neo4j.get_extended_classes("src/models.User") + assert user.is_abstract is False + + +def test_methods_and_constructor(ts_neo4j): + methods = ts_neo4j.get_methods_in_class("src/models.User") + assert "describe" in methods + assert "recordLogin" in methods + assert methods["recordLogin"].is_async is True + constructors = ts_neo4j.get_constructors("src/models.User") + assert any(c.kind == "constructor" for c in constructors.values()) + + +def test_fields_and_parameters(ts_neo4j): + fields = {f.name for f in ts_neo4j.get_fields("src/models.User")} + assert {"name", "role"} <= fields + params = ts_neo4j.get_method_parameters("src/services.UserService", "create") + assert isinstance(params, list) + + +def test_structured_decorators(ts_neo4j): + decorated = ts_neo4j.get_methods_with_decorators(["Controller", "Get"]) + assert any(sig.endswith("UserController.show") for sig in decorated["Get"]) + controller = ts_neo4j.get_class("src/controllers.UserController") + assert [d.name for d in controller.decorators] == ["Controller"] + assert controller.decorators[0].positional_arguments == ['"/users"'] + + +def test_class_decorators_query(ts_neo4j): + classes = ts_neo4j.get_classes_with_decorators(["Controller"]) + assert any(sig.endswith("UserController") for sig in classes["Controller"]) + + +def test_call_graph_no_dangling_nodes(ts_neo4j): + graph = ts_neo4j.get_call_graph() + assert isinstance(graph, nx.DiGraph) + assert graph.number_of_edges() > 0 + nodes = set(graph.nodes) + for src, dst in graph.edges: + assert src in nodes + assert dst in nodes + # edge metadata is surfaced just like the in-memory backend + src, dst = next(iter(graph.edges)) + data = graph.get_edge_data(src, dst) + assert data["type"] == "CALL_DEP" + assert "provenance" in data and "tags" in data + + +def test_callers_and_callees(ts_neo4j): + # bare-signature form (module-level function) + callees = ts_neo4j.get_callees("src/index.main") + callee_sigs = {c["callee_signature"] for c in callees["callee_details"]} + assert "src/services.UserService.constructor" in callee_sigs + + # (class, method) form, with edge metadata surfaced + callers = ts_neo4j.get_callers("src/services.UserService", "create") + assert callers["target_method"] == "src/services.UserService.create" + caller_sigs = {c["caller_signature"] for c in callers["caller_details"]} + assert "src/index.main" in caller_sigs + main_edge = next(c["edge"] for c in callers["caller_details"] if c["caller_signature"] == "src/index.main") + assert "provenance" in main_edge and "tags" in main_edge + + +def test_call_sites(ts_neo4j): + sites = ts_neo4j.get_call_sites("src/controllers.UserController.show") + assert any(cs.callee_signature == "src/services.UserService.create" for cs in sites) + create = next(cs for cs in sites if cs.callee_signature == "src/services.UserService.create") + assert create.receiver_type == "UserService" + assert create.start_line > 0 + + lines = ts_neo4j.get_calling_lines("src/services.UserService.create") + assert lines == sorted(lines) + assert all(line > 0 for line in lines) + + targets = ts_neo4j.get_call_targets("src/controllers.UserController.show") + assert "src/services.UserService.create" in targets + + +def test_class_call_graph(ts_neo4j): + edges = ts_neo4j.get_class_call_graph("src/controllers.UserController") + assert all(isinstance(e, tuple) and len(e) == 2 for e in edges) + flat = {s for s, _ in edges} | {t for _, t in edges} + assert any("UserController" in s for s in flat) + + +def test_class_hierarchy(ts_neo4j): + hierarchy = ts_neo4j.get_class_hierarchy() + assert isinstance(hierarchy, nx.DiGraph) + assert hierarchy.has_edge("src/models.User", "src/models.Entity") + + +def test_enum_members(ts_neo4j): + members = ts_neo4j.get_enum_members("src/models.Role") + assert len(members) > 0 + assert all(m.name for m in members) + + +def test_typescript_file_lookup(ts_neo4j): + assert ts_neo4j.get_typescript_file("src/models.User") == "src/models.ts" + + +def test_application_view_round_trips(ts_neo4j): + app = ts_neo4j.get_application_view() + assert set(app.symbol_table) == set(ts_neo4j.get_symbol_table()) + assert len(app.call_graph) == ts_neo4j.get_call_graph().number_of_edges() + + +def test_lazy_skips_rebuild(ts_neo4j, typescript_application): + """A second, non-eager backend against the already-loaded DB must not re-run the analyzer. + + Depends on ``ts_neo4j`` so the module fixture has already populated this app's subgraph. + """ + from unittest.mock import patch + + from cldk.analysis.typescript.neo4j import TSNeo4jBackend + + with patch.object(TSNeo4jBackend, "_get_codeanalyzer_exec") as exec_mock: + backend = TSNeo4jBackend( + project_dir=str(typescript_application), + analysis_backend_path=None, + analysis_level=AnalysisLevel.call_graph, + eager_analysis=False, + target_files=None, + neo4j_uri=NEO4J_URI, + neo4j_username=NEO4J_USER, + neo4j_password=NEO4J_PASSWORD, + application_name=APP_NAME, + build_db=True, + ) + try: + # The app already exists from the module fixture ⇒ lazy path, binary never resolved. + exec_mock.assert_not_called() + assert len(backend.get_all_classes()) == 6 + finally: + backend.close() diff --git a/tests/analysis/typescript/test_typescript_neo4j_selection.py b/tests/analysis/typescript/test_typescript_neo4j_selection.py new file mode 100644 index 0000000..f22f319 --- /dev/null +++ b/tests/analysis/typescript/test_typescript_neo4j_selection.py @@ -0,0 +1,107 @@ +################################################################################ +# Copyright IBM Corporation 2026 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Backend-selection unit tests for the TypeScript facade (no live Neo4j required). + +The Neo4j backend is fully mocked here, so these run anywhere. They verify that passing a +``Neo4jConnectionConfig`` swaps the facade onto :class:`TSNeo4jBackend` and that the facade's +``get_*`` methods are thin delegations to whichever backend is wired in. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from cldk import CLDK +from cldk.analysis import AnalysisLevel +from cldk.analysis.typescript.neo4j import Neo4jConnectionConfig + + +def test_neo4j_config_selects_neo4j_backend(typescript_application): + config = Neo4jConnectionConfig( + uri="bolt://example:7687", + username="neo4j", + password="secret", + application_name="myapp", + build_db=False, + ) + with patch("cldk.analysis.typescript.typescript_analysis.TSNeo4jBackend") as backend_cls: + backend = backend_cls.return_value + backend.get_application.return_value = MagicMock() + + analysis = CLDK(language="typescript").analysis( + project_path=typescript_application, + analysis_level=AnalysisLevel.call_graph, + neo4j_config=config, + ) + + # The neo4j backend was constructed with the config's connection details... + _, kwargs = backend_cls.call_args + assert kwargs["neo4j_uri"] == "bolt://example:7687" + assert kwargs["neo4j_password"] == "secret" + assert kwargs["application_name"] == "myapp" + assert kwargs["build_db"] is False + assert analysis.backend is backend + + # ...and a representative query delegates straight to it. + analysis.get_call_graph() + backend.get_call_graph.assert_called_once() + analysis.get_classes() + backend.get_all_classes.assert_called_once() + + +def test_no_config_uses_in_memory_backend(typescript_application): + with patch("cldk.analysis.typescript.typescript_analysis.TSCodeanalyzer") as backend_cls, patch("cldk.analysis.typescript.typescript_analysis.TSNeo4jBackend") as neo4j_cls: + backend_cls.return_value.get_application.return_value = MagicMock() + + analysis = CLDK(language="typescript").analysis( + project_path=typescript_application, + analysis_level=AnalysisLevel.symbol_table, + ) + + backend_cls.assert_called_once() + neo4j_cls.assert_not_called() + assert analysis.backend is backend_cls.return_value + + +def test_missing_neo4j_driver_raises_helpful_error(): + """Without the optional ``neo4j`` driver, constructing the backend explains how to install it.""" + import builtins + + from cldk.analysis.typescript.neo4j import TSNeo4jBackend + from cldk.utils.exceptions.exceptions import CodeanalyzerExecutionException + + real_import = builtins.__import__ + + def _no_neo4j(name, *args, **kwargs): + if name == "neo4j": + raise ModuleNotFoundError("No module named 'neo4j'") + return real_import(name, *args, **kwargs) + + with patch("builtins.__import__", side_effect=_no_neo4j): + with pytest.raises(CodeanalyzerExecutionException, match="neo4j"): + TSNeo4jBackend( + project_dir=None, + analysis_backend_path=None, + analysis_level=AnalysisLevel.symbol_table, + eager_analysis=False, + target_files=None, + neo4j_uri="bolt://example:7687", + neo4j_username="neo4j", + neo4j_password="neo4j", + application_name="app", + build_db=False, + ) diff --git a/tests/resources/typescript/application/src/controllers.ts b/tests/resources/typescript/application/src/controllers.ts new file mode 100644 index 0000000..c26e733 --- /dev/null +++ b/tests/resources/typescript/application/src/controllers.ts @@ -0,0 +1,28 @@ +import { UserService } from "./services"; + +// Minimal decorator factories (NestJS/Angular-flavored) to exercise structured TSDecorator capture. +function Controller(prefix: string): ClassDecorator { + return () => undefined; +} +function Get(path: string): MethodDecorator { + return () => undefined; +} +function Param(name: string): ParameterDecorator { + return () => undefined; +} + +@Controller("/users") +export class UserController { + constructor(private readonly service: UserService) {} + + @Get("/:id") + show(@Param("id") id: string): string { + const user = this.service.create(id); + return user.describe(); + } + + @Get("/") + list(): string[] { + return this.service.describeAll(); + } +} diff --git a/tests/resources/typescript/application/src/index.ts b/tests/resources/typescript/application/src/index.ts new file mode 100644 index 0000000..f628180 --- /dev/null +++ b/tests/resources/typescript/application/src/index.ts @@ -0,0 +1,25 @@ +import { UserController } from "./controllers"; +import { Robot, Role, User } from "./models"; +import { UserService, announce } from "./services"; +import { StringUtil } from "./util"; + +export function main(): void { + const service = new UserService(100); + service.create("Ada", Role.Admin); + service.createGuest(); + + const controller = new UserController(service); + controller.list(); + controller.show("42"); + + // interface-typed dispatch — RTA should expand announce -> {User,Robot}.describe + announce(new User(1, "Ada", Role.Admin)); + announce(new Robot("r2d2")); + + const slug = StringUtil.repeat("hello world", 2); + const builder = new StringUtil.Builder(); + builder.add("a").add("b").build(); + console.log(slug); +} + +main(); diff --git a/tests/resources/typescript/application/src/models.ts b/tests/resources/typescript/application/src/models.ts new file mode 100644 index 0000000..fb80ce1 --- /dev/null +++ b/tests/resources/typescript/application/src/models.ts @@ -0,0 +1,64 @@ +/** Domain models for the sample app. */ + +export interface Identifiable { + readonly id: T; +} + +export interface Named { + name: string; + describe(): string; +} + +export type UserId = string | number; + +export enum Role { + Admin = "admin", + Member = "member", + Guest = "guest", +} + +export const enum Flag { + None = 0, + Active = 1, +} + +/** A user of the system. */ +export abstract class Entity implements Identifiable { + constructor(public readonly id: ID) {} + abstract describe(): string; +} + +export class User extends Entity implements Named { + private loginCount = 0; + static instances = 0; + + constructor( + id: UserId, + public name: string, + private role: Role = Role.Member, + ) { + super(id); + User.instances++; + } + + get isAdmin(): boolean { + return this.role === Role.Admin; + } + + describe(): string { + return `${this.name} (${this.role})`; + } + + async recordLogin(): Promise { + this.loginCount += 1; + return this.loginCount; + } +} + +/** A second, unrelated implementer of Named — drives RTA subtype expansion. */ +export class Robot implements Named { + constructor(public name: string) {} + describe(): string { + return `robot:${this.name}`; + } +} diff --git a/tests/resources/typescript/application/src/services.ts b/tests/resources/typescript/application/src/services.ts new file mode 100644 index 0000000..79e63ce --- /dev/null +++ b/tests/resources/typescript/application/src/services.ts @@ -0,0 +1,47 @@ +import { type Named, Role, User, type UserId } from "./models"; + +/** Pure helper — top-level function. */ +export function makeGuestName(seed: number): string { + return `guest-${seed}`; +} + +/** + * Calls describe() on an interface-typed receiver. Under RTA this expands to every instantiated + * concrete implementer of Named (User, Robot, ...). + */ +export function announce(thing: Named): string { + return thing.describe(); +} + +/** Arrow function bound to a const (function_expression-style callable). */ +export const nextId = (n: number): UserId => n + 1; + +export class UserService { + private users: User[] = []; + + constructor(private readonly startId: number = 0) {} + + create(name: string, role: Role = Role.Member): User { + const id = nextId(this.users.length + this.startId); + const user = new User(id, name, role); + this.users.push(user); + return user; + } + + createGuest(): User { + const name = makeGuestName(this.users.length); + return this.create(name, Role.Guest); + } + + describeAll(): string[] { + return this.users.map((u) => u.describe()); + } + + async loginAll(): Promise { + let total = 0; + for (const u of this.users) { + total += await u.recordLogin(); + } + return total; + } +} diff --git a/tests/resources/typescript/application/src/util.ts b/tests/resources/typescript/application/src/util.ts new file mode 100644 index 0000000..19e0345 --- /dev/null +++ b/tests/resources/typescript/application/src/util.ts @@ -0,0 +1,34 @@ +/** A namespace with nested declarations, to exercise the namespaces{} collection + nested signatures. */ +export namespace StringUtil { + export function repeat(s: string, n: number): string { + return slug(s).repeat(n); + } + + export function slug(s: string): string { + return s.toLowerCase().replace(/\s+/g, "-"); + } + + export class Builder { + private parts: string[] = []; + add(part: string): this { + this.parts.push(slug(part)); + return this; + } + build(): string { + return this.parts.join("/"); + } + } +} + +/** Generic top-level function with a nested helper, to exercise inner_callables + generics. */ +export function classify(items: T[]): Record { + function keyOf(item: T): string { + return item.name.charAt(0); + } + const out: Record = {}; + for (const item of items) { + const k = keyOf(item); + (out[k] ??= []).push(item); + } + return out; +}