From 7ba421d7932bdb9ad0badfa70ad4e001bda7b837 Mon Sep 17 00:00:00 2001 From: stacknil Date: Mon, 29 Jun 2026 19:42:49 +0800 Subject: [PATCH] test(contract): cover five demo schemas --- README.md | 2 +- docs/evidence-pipeline-contract.md | 8 +- docs/reviewer-pack.md | 8 +- docs/roadmap.md | 2 +- schemas/cloud_iam_findings.schema.json | 104 ++++++++++++++++++++++ schemas/cloud_iam_summary.schema.json | 57 ++++++++++++ schemas/investigation_summary.schema.json | 83 +++++++---------- schemas/telemetry_summary.schema.json | 47 ++++++++++ tests/test_evidence_pipeline_schemas.py | 58 +++++++++++- 9 files changed, 308 insertions(+), 61 deletions(-) create mode 100644 schemas/cloud_iam_findings.schema.json create mode 100644 schemas/cloud_iam_summary.schema.json create mode 100644 schemas/telemetry_summary.schema.json diff --git a/README.md b/README.md index ec9184b..2c38330 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,7 @@ Cooldown behavior: - treat v1.0 as a five-demo contract freeze, not a feature expansion - stabilize the five-demo matrix and avoid broad platform expansion - freeze reviewer-visible artifact names unless a rename is intentionally coordinated across docs, tests, and sample outputs -- keep JSON schema contracts aligned with selected reviewer-facing evidence artifacts +- keep JSON schema contracts aligned with reviewer-facing JSON evidence artifacts across the five-demo matrix - keep committed artifacts aligned with regenerated pipeline output through `python scripts/regenerate_artifacts.py --check` - add a reviewer-facing artifact diff for each release, using `no-artifact-change` when committed reviewer artifacts are unchanged - use [`docs/reviewer-pack.md`](docs/reviewer-pack.md) and [`docs/architecture.md`](docs/architecture.md) as the consolidation entrypoints diff --git a/docs/evidence-pipeline-contract.md b/docs/evidence-pipeline-contract.md index c9212e9..971349d 100644 --- a/docs/evidence-pipeline-contract.md +++ b/docs/evidence-pipeline-contract.md @@ -1,6 +1,6 @@ # Evidence Pipeline Contract -`telemetry-lab` v1.0 treats reviewer-facing JSON artifacts as evidence pipeline contracts. The schemas below define the current machine-readable artifact shapes for selected demo outputs without turning the repo into a SIEM, dashboard, or monitoring platform. +`telemetry-lab` v1.0 treats reviewer-facing JSON artifacts as evidence pipeline contracts. The schemas below define the current machine-readable artifact shapes across the five-demo matrix without turning the repo into a SIEM, dashboard, or monitoring platform. Use [`docs/vocabulary.md`](vocabulary.md) for the cross-demo meaning of `event`, `signal`, `hit`, `finding`, `case_bundle`, `summary`, `report`, and `audit_trace`. @@ -15,11 +15,13 @@ The contract is intentionally local and file-based: | Schema | Demo artifact | What it locks | | --- | --- | --- | +| `schemas/telemetry_summary.schema.json` | `data/processed/summary.json`, `data/processed/richer_sample/summary.json` | telemetry-window run counts, rule counts, cooldown, and generated artifact references | | `schemas/rule_hits.schema.json` | `demos/ai-assisted-detection-demo/artifacts/rule_hits.json` | deterministic rule-hit fields before case grouping | | `schemas/case_bundles.schema.json` | `demos/ai-assisted-detection-demo/artifacts/case_bundles.json` | bounded case bundles passed to JSON-only drafting | | `schemas/dedup_explanations.schema.json` | `demos/rule-evaluation-and-dedup-demo/artifacts/dedup_explanations.json` | retained/suppressed cooldown explanations | -| `schemas/investigation_signals.schema.json` | `demos/cloud-iam-change-investigation-demo/artifacts/investigation_signals.json` | bounded CloudTrail-like investigation signals | -| `schemas/investigation_summary.schema.json` | `demos/cloud-iam-change-investigation-demo/artifacts/investigation_summary.json` | Cloud IAM investigation summary and time-model metadata | +| `schemas/investigation_summary.schema.json` | `demos/config-change-investigation-demo/artifacts/investigation_summary.json` | config-change investigation summaries and bounded evidence counts | +| `schemas/cloud_iam_findings.schema.json` | `demos/cloud-iam-change-investigation-demo/artifacts/investigation_signals.json` | bounded CloudTrail-like IAM findings | +| `schemas/cloud_iam_summary.schema.json` | `demos/cloud-iam-change-investigation-demo/artifacts/investigation_summary.json` | Cloud IAM investigation summary and time-model metadata | ## Contract Rules diff --git a/docs/reviewer-pack.md b/docs/reviewer-pack.md index cde7f84..e15891c 100644 --- a/docs/reviewer-pack.md +++ b/docs/reviewer-pack.md @@ -40,7 +40,7 @@ The current artifact names are reviewer-facing contracts for the v1 reviewer con ## Evidence Pipeline Contract -See [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md) for the v1 JSON schema contract covering selected reviewer-facing evidence artifacts. +See [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md) for the v1 JSON schema contract covering reviewer-facing JSON evidence artifacts across the five-demo matrix. See [`docs/vocabulary.md`](vocabulary.md) for the cross-demo meaning of `event`, `signal`, `hit`, `finding`, `case_bundle`, `summary`, `report`, and `audit_trace`. @@ -51,8 +51,10 @@ The current schema contract covers: - `schemas/rule_hits.schema.json` - `schemas/case_bundles.schema.json` - `schemas/dedup_explanations.schema.json` -- `schemas/investigation_signals.schema.json` - `schemas/investigation_summary.schema.json` +- `schemas/cloud_iam_findings.schema.json` +- `schemas/cloud_iam_summary.schema.json` +- `schemas/telemetry_summary.schema.json` ### Stable Reviewer-Visible Artifacts @@ -103,7 +105,7 @@ Use the same Python interpreter for install, tests, and demo commands. - [`docs/reviewer-brief.md`](reviewer-brief.md): short problem / value summary - [`docs/reviewer-path.md`](reviewer-path.md): demo choice by review question - [`docs/v1-contract-freeze.md`](v1-contract-freeze.md): v1.0 five-demo contract freeze and release drift note -- [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md): JSON schema contracts for selected evidence artifacts +- [`docs/evidence-pipeline-contract.md`](evidence-pipeline-contract.md): JSON schema contracts for five-demo evidence artifacts - [`docs/reviewer-artifact-diff.md`](reviewer-artifact-diff.md): release diff contract for reviewer-facing artifact changes - [`docs/vocabulary.md`](vocabulary.md): cross-demo evidence workflow vocabulary - [`docs/architecture.md`](architecture.md): local file-based workflow diagram diff --git a/docs/roadmap.md b/docs/roadmap.md index 5e415f8..d5b085c 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -25,7 +25,7 @@ Recently added: 1. Stabilize the demo matrix. 2. Treat v1.0 as a five-demo contract freeze, not a feature expansion. 3. Freeze reviewer-visible artifact names unless a rename is intentional and documented across README, reviewer docs, demo docs, tests, and sample outputs. -4. Keep JSON schema contracts aligned with selected reviewer-facing evidence artifacts. +4. Keep JSON schema contracts aligned with reviewer-facing JSON evidence artifacts across the five-demo matrix. 5. Keep committed evidence artifacts aligned with regenerated pipeline output through `python scripts/regenerate_artifacts.py --check`. 6. Keep cross-demo vocabulary stable for evidence workflow terms. 7. Include reviewer-facing artifact diffs in every release, including explicit `no-artifact-change` notes when committed reviewer artifacts are unchanged. diff --git a/schemas/cloud_iam_findings.schema.json b/schemas/cloud_iam_findings.schema.json new file mode 100644 index 0000000..4ad9b4b --- /dev/null +++ b/schemas/cloud_iam_findings.schema.json @@ -0,0 +1,104 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/stacknil/telemetry-lab/schemas/cloud_iam_findings.schema.json", + "title": "Cloud IAM findings", + "description": "Schema for demos/cloud-iam-change-investigation-demo/artifacts/investigation_signals.json as reviewer-facing Cloud IAM findings.", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "signal_id", + "rule_id", + "rule_name", + "severity", + "signal_time", + "actor", + "primary_event_id", + "source_ips", + "evidence_event_ids", + "evidence_events", + "attack_mappings", + "bounded_correlation_reason", + "review_scope" + ], + "properties": { + "signal_id": { "type": "string", "pattern": "^CTI-[0-9]{3}$" }, + "rule_id": { "type": "string", "minLength": 1 }, + "rule_name": { "type": "string", "minLength": 1 }, + "severity": { "type": "string", "enum": ["low", "medium", "high", "critical"] }, + "signal_time": { "type": "string", "format": "date-time" }, + "actor": { "type": "string", "minLength": 1 }, + "primary_event_id": { "type": "string", "minLength": 1 }, + "source_ips": { + "type": "array", + "minItems": 1, + "items": { "type": "string", "minLength": 1 } + }, + "evidence_event_ids": { + "type": "array", + "minItems": 1, + "items": { "type": "string", "minLength": 1 } + }, + "evidence_events": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/evidence_event" } + }, + "attack_mappings": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/attack_mapping" } + }, + "bounded_correlation_reason": { "type": "string", "minLength": 1 }, + "review_scope": { "type": "string", "minLength": 1 } + } + }, + "$defs": { + "attack_mapping": { + "type": "object", + "additionalProperties": false, + "required": ["id", "name", "tactic", "reference"], + "properties": { + "id": { "type": "string", "pattern": "^T[0-9]{4}(\\.[0-9]{3})?$" }, + "name": { "type": "string", "minLength": 1 }, + "tactic": { "type": "string", "minLength": 1 }, + "reference": { "type": "string", "format": "uri" } + } + }, + "evidence_event": { + "type": "object", + "additionalProperties": false, + "required": [ + "eventID", + "event_time", + "observed_time", + "eventTime", + "actor", + "eventSource", + "eventName", + "awsRegion", + "sourceIPAddress", + "errorCode", + "requestParameters" + ], + "properties": { + "eventID": { "type": "string", "minLength": 1 }, + "event_time": { "type": "string", "format": "date-time" }, + "observed_time": { "type": ["string", "null"], "format": "date-time" }, + "eventTime": { "type": "string", "format": "date-time" }, + "actor": { "type": "string", "minLength": 1 }, + "eventSource": { "type": "string", "minLength": 1 }, + "eventName": { "type": "string", "minLength": 1 }, + "awsRegion": { "type": "string", "minLength": 1 }, + "sourceIPAddress": { "type": "string", "minLength": 1 }, + "errorCode": { "type": ["string", "null"] }, + "requestParameters": { + "type": "object", + "additionalProperties": true + } + } + } + } +} diff --git a/schemas/cloud_iam_summary.schema.json b/schemas/cloud_iam_summary.schema.json new file mode 100644 index 0000000..e33dc01 --- /dev/null +++ b/schemas/cloud_iam_summary.schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/stacknil/telemetry-lab/schemas/cloud_iam_summary.schema.json", + "title": "Cloud IAM investigation summary", + "description": "Schema for demos/cloud-iam-change-investigation-demo/artifacts/investigation_summary.json.", + "type": "object", + "additionalProperties": false, + "required": [ + "schema_version", + "source_type", + "event_count", + "signal_count", + "rule_counts", + "attack_mapping_count", + "time_model", + "boundaries" + ], + "properties": { + "schema_version": { + "type": "string", + "const": "cloud-iam-change-investigation-demo/v1" + }, + "source_type": { "type": "string", "minLength": 1 }, + "event_count": { "type": "integer", "minimum": 0 }, + "signal_count": { "type": "integer", "minimum": 0 }, + "rule_counts": { + "type": "object", + "minProperties": 1, + "additionalProperties": { + "type": "integer", + "minimum": 0 + } + }, + "attack_mapping_count": { "type": "integer", "minimum": 0 }, + "time_model": { + "type": "object", + "additionalProperties": false, + "required": [ + "event_time_source", + "observed_time_source", + "detection_ordering", + "observed_time_event_count" + ], + "properties": { + "event_time_source": { "type": "string", "const": "eventTime" }, + "observed_time_source": { "type": "string", "const": "observedTime when present" }, + "detection_ordering": { "type": "string", "const": "event_time" }, + "observed_time_event_count": { "type": "integer", "minimum": 0 } + } + }, + "boundaries": { + "type": "array", + "minItems": 1, + "items": { "type": "string", "minLength": 1 } + } + } +} diff --git a/schemas/investigation_summary.schema.json b/schemas/investigation_summary.schema.json index 21f2926..8ec7253 100644 --- a/schemas/investigation_summary.schema.json +++ b/schemas/investigation_summary.schema.json @@ -1,57 +1,38 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://github.com/stacknil/telemetry-lab/schemas/investigation_summary.schema.json", - "title": "Cloud IAM investigation summary", - "description": "Schema for demos/cloud-iam-change-investigation-demo/artifacts/investigation_summary.json.", - "type": "object", - "additionalProperties": false, - "required": [ - "schema_version", - "source_type", - "event_count", - "signal_count", - "rule_counts", - "attack_mapping_count", - "time_model", - "boundaries" - ], - "properties": { - "schema_version": { - "type": "string", - "const": "cloud-iam-change-investigation-demo/v1" - }, - "source_type": { "type": "string", "minLength": 1 }, - "event_count": { "type": "integer", "minimum": 0 }, - "signal_count": { "type": "integer", "minimum": 0 }, - "rule_counts": { - "type": "object", - "minProperties": 1, - "additionalProperties": { - "type": "integer", - "minimum": 0 - } - }, - "attack_mapping_count": { "type": "integer", "minimum": 0 }, - "time_model": { - "type": "object", - "additionalProperties": false, - "required": [ - "event_time_source", - "observed_time_source", - "detection_ordering", - "observed_time_event_count" - ], - "properties": { - "event_time_source": { "type": "string", "const": "eventTime" }, - "observed_time_source": { "type": "string", "const": "observedTime when present" }, - "detection_ordering": { "type": "string", "const": "event_time" }, - "observed_time_event_count": { "type": "integer", "minimum": 0 } - } - }, - "boundaries": { - "type": "array", - "minItems": 1, - "items": { "type": "string", "minLength": 1 } + "title": "Config-change investigation summary", + "description": "Schema for demos/config-change-investigation-demo/artifacts/investigation_summary.json.", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "investigation_id", + "severity", + "target_system", + "triggering_change_id", + "summary", + "evidence_counts", + "bounded_correlation_reason" + ], + "properties": { + "investigation_id": { "type": "string", "pattern": "^CCI-[0-9]{3}$" }, + "severity": { "type": "string", "enum": ["low", "medium", "high", "critical"] }, + "target_system": { "type": "string", "minLength": 1 }, + "triggering_change_id": { "type": "string", "minLength": 1 }, + "summary": { "type": "string", "minLength": 1 }, + "evidence_counts": { + "type": "object", + "additionalProperties": false, + "required": ["policy_denials", "follow_on_events"], + "properties": { + "policy_denials": { "type": "integer", "minimum": 0 }, + "follow_on_events": { "type": "integer", "minimum": 0 } + } + }, + "bounded_correlation_reason": { "type": "string", "minLength": 1 } } } } diff --git a/schemas/telemetry_summary.schema.json b/schemas/telemetry_summary.schema.json new file mode 100644 index 0000000..4b666e5 --- /dev/null +++ b/schemas/telemetry_summary.schema.json @@ -0,0 +1,47 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/stacknil/telemetry-lab/schemas/telemetry_summary.schema.json", + "title": "Telemetry window run summary", + "description": "Schema for data/processed/summary.json and data/processed/richer_sample/summary.json.", + "type": "object", + "additionalProperties": false, + "required": [ + "input_path", + "output_dir", + "normalized_event_count", + "window_count", + "feature_row_count", + "alert_count", + "triggered_rule_names", + "triggered_rule_counts", + "cooldown_seconds", + "generated_artifacts" + ], + "properties": { + "input_path": { "type": "string", "minLength": 1 }, + "output_dir": { "type": "string", "minLength": 1 }, + "normalized_event_count": { "type": "integer", "minimum": 0 }, + "window_count": { "type": "integer", "minimum": 0 }, + "feature_row_count": { "type": "integer", "minimum": 0 }, + "alert_count": { "type": "integer", "minimum": 0 }, + "triggered_rule_names": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true + }, + "triggered_rule_counts": { + "type": "object", + "additionalProperties": { + "type": "integer", + "minimum": 0 + } + }, + "cooldown_seconds": { "type": "integer", "minimum": 0 }, + "generated_artifacts": { + "type": "array", + "minItems": 1, + "items": { "type": "string", "minLength": 1 }, + "uniqueItems": true + } + } +} diff --git a/tests/test_evidence_pipeline_schemas.py b/tests/test_evidence_pipeline_schemas.py index 86ef662..a61bf3d 100644 --- a/tests/test_evidence_pipeline_schemas.py +++ b/tests/test_evidence_pipeline_schemas.py @@ -9,6 +9,10 @@ REPO_ROOT = Path(__file__).resolve().parents[1] SCHEMA_CONTRACTS = { + "schemas/telemetry_summary.schema.json": [ + "data/processed/summary.json", + "data/processed/richer_sample/summary.json", + ], "schemas/rule_hits.schema.json": [ "demos/ai-assisted-detection-demo/artifacts/rule_hits.json", ], @@ -18,14 +22,37 @@ "schemas/dedup_explanations.schema.json": [ "demos/rule-evaluation-and-dedup-demo/artifacts/dedup_explanations.json", ], - "schemas/investigation_signals.schema.json": [ + "schemas/investigation_summary.schema.json": [ + "demos/config-change-investigation-demo/artifacts/investigation_summary.json", + ], + "schemas/cloud_iam_findings.schema.json": [ "demos/cloud-iam-change-investigation-demo/artifacts/investigation_signals.json", ], - "schemas/investigation_summary.schema.json": [ + "schemas/cloud_iam_summary.schema.json": [ "demos/cloud-iam-change-investigation-demo/artifacts/investigation_summary.json", ], } +DEMO_SCHEMA_COVERAGE = { + "telemetry-window-demo": [ + "schemas/telemetry_summary.schema.json", + ], + "ai-assisted-detection-demo": [ + "schemas/rule_hits.schema.json", + "schemas/case_bundles.schema.json", + ], + "rule-evaluation-and-dedup-demo": [ + "schemas/dedup_explanations.schema.json", + ], + "config-change-investigation-demo": [ + "schemas/investigation_summary.schema.json", + ], + "cloud-iam-change-investigation-demo": [ + "schemas/cloud_iam_findings.schema.json", + "schemas/cloud_iam_summary.schema.json", + ], +} + def _load_json(relative_path: str) -> object: return json.loads((REPO_ROOT / relative_path).read_text(encoding="utf-8")) @@ -70,7 +97,34 @@ def test_evidence_pipeline_contract_docs_reference_schemas_and_artifacts() -> No for schema_path, artifact_paths in SCHEMA_CONTRACTS.items(): assert f"`{schema_path}`" in contract_doc + assert f"`{schema_path}`" in reviewer_pack assert (REPO_ROOT / schema_path).is_file(), schema_path for artifact_path in artifact_paths: assert f"`{artifact_path}`" in contract_doc assert (REPO_ROOT / artifact_path).is_file(), artifact_path + + +def test_schema_contracts_cover_all_five_demos_and_named_artifacts() -> None: + contract_schema_paths = set(SCHEMA_CONTRACTS) + + assert set(DEMO_SCHEMA_COVERAGE) == { + "telemetry-window-demo", + "ai-assisted-detection-demo", + "rule-evaluation-and-dedup-demo", + "config-change-investigation-demo", + "cloud-iam-change-investigation-demo", + } + for demo_name, schema_paths in DEMO_SCHEMA_COVERAGE.items(): + assert schema_paths, demo_name + for schema_path in schema_paths: + assert schema_path in contract_schema_paths + assert (REPO_ROOT / schema_path).is_file(), schema_path + + for required_schema in [ + "schemas/rule_hits.schema.json", + "schemas/case_bundles.schema.json", + "schemas/dedup_explanations.schema.json", + "schemas/investigation_summary.schema.json", + "schemas/cloud_iam_findings.schema.json", + ]: + assert required_schema in contract_schema_paths