Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added
- Neo4j-backed TypeScript analysis backend (`cldk.analysis.typescript.neo4j.TSNeo4jBackend`). It
is a drop-in alternative to the in-memory `TSCodeanalyzer`: it answers the **same** `get_*`
query surface (call graph, callers/callees, class hierarchy, call sites, decorators, symbol
lookups, ...) by running **Cypher over a live Neo4j graph** instead of walking the pydantic /
NetworkX structures. The graph is the one `codeanalyzer-typescript` emits with `--emit neo4j`
(schema `schema.neo4j.json`); the backend can populate the database for you over Bolt, or query
one that is already loaded.
- `TypeScriptAnalysis` / `CLDK.analysis(language="typescript")` now accept an optional
`neo4j_config` (`Neo4jConnectionConfig`) to select the Neo4j backend; without it the in-memory
backend is used, unchanged.
- Optional `neo4j` extra (`pip install cldk[neo4j]`) for the Neo4j Python driver.

## [v1.0.7] - 2026-02-14

### Added
Expand Down
213 changes: 213 additions & 0 deletions cldk/analysis/java/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
################################################################################
# Copyright IBM Corporation 2026
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

"""The Java analysis backend contract.

:class:`JavaAnalysis` is a (mostly) thin façade that delegates its static-analysis queries to a
*backend*. Today the only backend is :class:`~cldk.analysis.java.codeanalyzer.JCodeanalyzer`
(in-memory pydantic / NetworkX over the codeanalyzer JSON); this ABC formalizes the surface the
façade depends on so an alternative backend (e.g. a forthcoming Neo4j/Cypher backend, mirroring
the TypeScript :class:`~cldk.analysis.typescript.neo4j.TSNeo4jBackend`) can be dropped in and
selected without touching the façade.

The contract is enforced by the type system and at instantiation time rather than matching only by
convention. Note the façade also calls Tree-sitter directly for a few parsing/sanitization helpers
(e.g. ``is_parsable``, ``get_raw_ast``); those are not part of the backend contract — only the
analysis queries the façade routes through ``self.backend`` are.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Dict, List, Tuple, Union

import networkx as nx

from cldk.models.java.models import (
JApplication,
JCallable,
JCallableParameter,
JComment,
JCompilationUnit,
JCRUDOperation,
JField,
JMethodDetail,
JType,
)

# A CRUD query row: the owning type + callable and the operations found within it.
CRUDRow = Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]


class JavaAnalysisBackend(ABC):
"""Abstract base every Java analysis backend implements.

A backend owns all indexing and query logic for a Java application (symbol table, call graph,
class/method/field navigation, entry points, CRUD operations, comments/docstrings); the
:class:`JavaAnalysis` façade delegates to it. Implementations must return the canonical
``cldk.models.java`` pydantic objects (or the documented NetworkX / dict / list shapes) so
backends are behaviorally interchangeable.
"""

# -----[ application / whole-program ]-----
@abstractmethod
def get_application_view(self) -> JApplication:
"""The whole application view."""

@abstractmethod
def get_symbol_table(self) -> Dict[str, JCompilationUnit]:
"""The per-file symbol table, keyed by file path."""

@abstractmethod
def get_compilation_units(self) -> List[JCompilationUnit]:
"""All compilation units."""

@abstractmethod
def get_java_file(self, qualified_class_name: str) -> str:
"""The file path declaring a class."""

@abstractmethod
def get_java_compilation_unit(self, file_path: str) -> JCompilationUnit:
"""The compilation unit for a file path."""

# -----[ call graph ]-----
@abstractmethod
def get_call_graph(self) -> nx.DiGraph:
"""NetworkX DiGraph of the application's call edges."""

@abstractmethod
def get_call_graph_json(self) -> str:
"""The call graph serialized as JSON."""

@abstractmethod
def get_all_callers(self, target_class_name: str, target_method_signature: str, using_symbol_table: bool) -> Dict:
"""Callers of a method."""

@abstractmethod
def get_all_callees(self, source_class_name: str, source_method_signature: str, using_symbol_table: bool) -> Dict:
"""Callees of a method."""

@abstractmethod
def get_class_call_graph(self, qualified_class_name: str, method_name: str | None = None) -> List[Tuple[JMethodDetail, JMethodDetail]]:
"""Call-graph edges reachable from a class (or one of its methods)."""

@abstractmethod
def get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> List[Tuple[JMethodDetail, JMethodDetail]]:
"""Call-graph edges reachable from a class, computed from the symbol table only."""

# -----[ classes / methods / fields ]-----
@abstractmethod
def get_all_classes(self) -> Dict[str, JType]:
"""Every class, keyed by qualified name."""

@abstractmethod
def get_class(self, qualified_class_name: str) -> JType:
"""A single class by qualified name."""

@abstractmethod
def get_all_sub_classes(self, qualified_class_name: str) -> Dict[str, JType]:
"""Classes that extend/implement the given class."""

@abstractmethod
def get_all_nested_classes(self, qualified_class_name: str) -> List[JType]:
"""The classes declared inside a class."""

@abstractmethod
def get_extended_classes(self, qualified_class_name: str) -> List[str]:
"""The base classes a class extends."""

@abstractmethod
def get_implemented_interfaces(self, qualified_class_name: str) -> List[str]:
"""The interfaces a class implements."""

@abstractmethod
def get_all_methods_in_application(self) -> Dict[str, Dict[str, JCallable]]:
"""All methods grouped by their owning class qualified name."""

@abstractmethod
def get_all_methods_in_class(self, qualified_class_name: str) -> Dict[str, JCallable]:
"""The methods of a class."""

@abstractmethod
def get_method(self, qualified_class_name: str, method_signature: str) -> JCallable:
"""A single method of a class."""

@abstractmethod
def get_method_parameters(self, qualified_class_name: str, method_signature: str) -> List[JCallableParameter]:
"""The parameters of a method."""

@abstractmethod
def get_all_constructors(self, qualified_class_name: str) -> Dict[str, JCallable]:
"""The constructors of a class."""

@abstractmethod
def get_all_fields(self, qualified_class_name: str) -> List[JField]:
"""The fields of a class."""

# -----[ entry points ]-----
@abstractmethod
def get_all_entry_point_methods(self) -> Dict[str, Dict[str, JCallable]]:
"""Methods identified as application entry points."""

@abstractmethod
def get_all_entry_point_classes(self) -> Dict[str, JType]:
"""Classes identified as application entry points."""

# -----[ CRUD operations ]-----
@abstractmethod
def get_all_crud_operations(self) -> List[CRUDRow]:
"""All CRUD operations across the application."""

@abstractmethod
def get_all_create_operations(self) -> List[CRUDRow]:
"""All create operations."""

@abstractmethod
def get_all_read_operations(self) -> List[CRUDRow]:
"""All read operations."""

@abstractmethod
def get_all_update_operations(self) -> List[CRUDRow]:
"""All update operations."""

@abstractmethod
def get_all_delete_operations(self) -> List[CRUDRow]:
"""All delete operations."""

# -----[ comments / docstrings ]-----
@abstractmethod
def get_all_comments(self) -> Dict[str, List[JComment]]:
"""All comments across the application, keyed by file."""

@abstractmethod
def get_comment_in_file(self, file_path: str) -> List[JComment]:
"""The comments in a file."""

@abstractmethod
def get_comments_in_a_class(self, qualified_class_name: str) -> List[JComment]:
"""The comments in a class."""

@abstractmethod
def get_comments_in_a_method(self, qualified_class_name: str, method_signature: str) -> List[JComment]:
"""The comments in a method."""

@abstractmethod
def get_all_docstrings(self) -> List[Tuple[str, JComment]]:
"""All docstring-style comments across the application."""

@abstractmethod
def remove_all_comments(self, src_code: str) -> str:
"""Strip all comments from the given source code."""
3 changes: 2 additions & 1 deletion cldk/analysis/java/codeanalyzer/codeanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import networkx as nx

from cldk.analysis import AnalysisLevel
from cldk.analysis.java.backend import JavaAnalysisBackend
from cldk.analysis.commons.treesitter import TreesitterJava
from cldk.models.java import JGraphEdges
from cldk.models.java.enums import CRUDOperationType
Expand All @@ -37,7 +38,7 @@
logger = logging.getLogger(__name__)


class JCodeanalyzer:
class JCodeanalyzer(JavaAnalysisBackend):
"""A class for building the application view of a Java application using Codeanalyzer.

Args:
Expand Down
3 changes: 2 additions & 1 deletion cldk/analysis/java/java_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from cldk.models.java import JApplication
from cldk.models.java.models import JCRUDOperation, JComment, JCompilationUnit, JMethodDetail, JType, JField
from cldk.analysis.java.codeanalyzer import JCodeanalyzer
from cldk.analysis.java.backend import JavaAnalysisBackend


class JavaAnalysis:
Expand Down Expand Up @@ -154,7 +155,7 @@ def __init__(
self.target_files = target_files
self.treesitter_java: TreesitterJava = TreesitterJava()
# Initialize the analysis analysis_backend
self.backend: JCodeanalyzer = JCodeanalyzer(
self.backend: JavaAnalysisBackend = JCodeanalyzer(
project_dir=self.project_dir,
source_code=self.source_code,
eager_analysis=self.eager_analysis,
Expand Down
Loading