From 80ccac28618f6013b53699706a3dedc58a02fcdb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 05:49:28 +0000 Subject: [PATCH 01/39] feat(spac): derive deal attempts from the 8-K event stream Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- src/storage/spac/spacDealGrouping.test.ts | 155 ++++++++++++++++++++++ src/storage/spac/spacDealGrouping.ts | 133 +++++++++++++++++++ 2 files changed, 288 insertions(+) create mode 100644 src/storage/spac/spacDealGrouping.test.ts create mode 100644 src/storage/spac/spacDealGrouping.ts diff --git a/src/storage/spac/spacDealGrouping.test.ts b/src/storage/spac/spacDealGrouping.test.ts new file mode 100644 index 0000000..7e37bf3 --- /dev/null +++ b/src/storage/spac/spacDealGrouping.test.ts @@ -0,0 +1,155 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from "bun:test"; +import { deriveDealsFromEvents } from "./spacDealGrouping"; +import type { SpacDeal } from "./SpacDealSchema"; +import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; + +function ev( + event_type: SpacEventType, + event_date: string, + accession_number = `${event_date}-${event_type}` +): SpacEvent { + return { + cik: 1, + accession_number, + event_type, + event_date, + form: "8-K", + primary_document: null, + source_document_url: null, + deal_index: null, + amount: null, + shares: null, + detail: null, + confidence: null, + created_at: "2026-01-01T00:00:00.000Z", + }; +} + +function deal(p: Pick & Partial): SpacDeal { + return { + cik: 1, + target_name: null, + target_cik: null, + announced_date: null, + definitive_agreement_date: null, + proxy_date: null, + vote_date: null, + pipe_amount: null, + redemption_amount: null, + redemption_shares: null, + outcome_date: null, + source_accession: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + +describe("deriveDealsFromEvents", () => { + it("groups DA -> vote -> completion into one completed deal", () => { + const deals = deriveDealsFromEvents( + 1, + [ + ev("definitive_agreement", "2021-03-01"), + ev("vote", "2021-06-01"), + ev("completed", "2021-06-15"), + ], + [] + ); + expect(deals.length).toBe(1); + expect(deals[0].deal_index).toBe(0); + expect(deals[0].outcome).toBe("completed"); + expect(deals[0].announced_date).toBe("2021-03-01"); + expect(deals[0].definitive_agreement_date).toBe("2021-03-01"); + expect(deals[0].vote_date).toBe("2021-06-01"); + expect(deals[0].outcome_date).toBe("2021-06-15"); + }); + + it("splits a terminated attempt and a later completed attempt into two deals", () => { + const deals = deriveDealsFromEvents( + 1, + [ + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-01"), + ev("definitive_agreement", "2021-05-01"), + ev("completed", "2021-09-01"), + ], + [] + ); + expect(deals.map((d) => d.deal_index)).toEqual([0, 1]); + expect(deals[0].outcome).toBe("terminated"); + expect(deals[0].outcome_date).toBe("2021-02-01"); + expect(deals[1].outcome).toBe("completed"); + expect(deals[1].announced_date).toBe("2021-05-01"); + expect(deals[1].outcome_date).toBe("2021-09-01"); + }); + + it("ignores an extension vote with no open deal", () => { + const deals = deriveDealsFromEvents(1, [ev("vote", "2021-04-01")], []); + expect(deals.length).toBe(0); + }); + + it("opens an already-completed deal when 2.01 has no preceding DA", () => { + const deals = deriveDealsFromEvents(1, [ev("completed", "2021-09-01")], []); + expect(deals.length).toBe(1); + expect(deals[0].outcome).toBe("completed"); + expect(deals[0].announced_date).toBeNull(); + expect(deals[0].outcome_date).toBe("2021-09-01"); + }); + + it("assigns the same deal_index regardless of event insertion order", () => { + const ordered = deriveDealsFromEvents( + 1, + [ + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-01"), + ev("definitive_agreement", "2021-05-01"), + ev("completed", "2021-09-01"), + ], + [] + ); + const shuffled = deriveDealsFromEvents( + 1, + [ + ev("completed", "2021-09-01"), + ev("definitive_agreement", "2021-05-01"), + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-01"), + ], + [] + ); + expect(shuffled).toEqual(ordered); + }); + + it("merge-preserves AI-enriched fields not owned by 8-K", () => { + const existing = [ + deal({ + deal_index: 0, + outcome: "pending", + target_name: "Acme Target Inc.", + target_cik: 99, + pipe_amount: 150_000_000, + proxy_date: "2021-05-20", + created_at: "2020-01-01T00:00:00.000Z", + }), + ]; + const deals = deriveDealsFromEvents( + 1, + [ev("definitive_agreement", "2021-03-01"), ev("completed", "2021-06-15")], + existing + ); + expect(deals.length).toBe(1); + expect(deals[0].outcome).toBe("completed"); + expect(deals[0].outcome_date).toBe("2021-06-15"); + expect(deals[0].target_name).toBe("Acme Target Inc."); + expect(deals[0].target_cik).toBe(99); + expect(deals[0].pipe_amount).toBe(150_000_000); + expect(deals[0].proxy_date).toBe("2021-05-20"); + expect(deals[0].created_at).toBe("2020-01-01T00:00:00.000Z"); + }); +}); diff --git a/src/storage/spac/spacDealGrouping.ts b/src/storage/spac/spacDealGrouping.ts new file mode 100644 index 0000000..083adfe --- /dev/null +++ b/src/storage/spac/spacDealGrouping.ts @@ -0,0 +1,133 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { SpacDeal, SpacDealOutcome } from "./SpacDealSchema"; +import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; + +/** Event types that shape a business-combination attempt. */ +const DEAL_RELEVANT_EVENT_TYPES: readonly SpacEventType[] = [ + "definitive_agreement", + "terminated", + "completed", + "vote", +]; + +interface DealSkeleton { + deal_index: number; + announced_date: string | null; + definitive_agreement_date: string | null; + vote_date: string | null; + outcome: SpacDealOutcome; + outcome_date: string | null; + source_accession: string | null; +} + +/** + * Rebuild the full {@link SpacDeal} set for a CIK from its append-only events. + * + * Deterministic + replay-safe: events are ordered by `(event_date, + * accession_number)` and walked with a single "open deal" cursor, so the same + * event set always yields the same `deal_index` assignments. The result + * merge-preserves §4b-owned columns (`target_*`, `pipe_amount`, `redemption_*`, + * `proxy_date`) and `created_at` from any matching existing deal row. + */ +export function deriveDealsFromEvents( + cik: number, + events: readonly SpacEvent[], + existingDeals: readonly SpacDeal[] +): SpacDeal[] { + const relevant = events + .filter((e) => DEAL_RELEVANT_EVENT_TYPES.includes(e.event_type as SpacEventType)) + .slice() + .sort( + (a, b) => + a.event_date.localeCompare(b.event_date) || + a.accession_number.localeCompare(b.accession_number) + ); + + const skeletons: DealSkeleton[] = []; + let open: DealSkeleton | null = null; + let nextIndex = 0; + + const openNew = (e: SpacEvent): DealSkeleton => { + const d: DealSkeleton = { + deal_index: nextIndex++, + announced_date: null, + definitive_agreement_date: null, + vote_date: null, + outcome: "pending", + outcome_date: null, + source_accession: e.accession_number, + }; + skeletons.push(d); + return d; + }; + + for (const e of relevant) { + switch (e.event_type) { + case "definitive_agreement": { + if (!open) open = openNew(e); + if (open.announced_date == null) open.announced_date = e.event_date; + if ( + open.definitive_agreement_date == null || + e.event_date > open.definitive_agreement_date + ) { + open.definitive_agreement_date = e.event_date; + } + open.source_accession = e.accession_number; + break; + } + case "terminated": { + if (open) { + open.outcome = "terminated"; + open.outcome_date = e.event_date; + open.source_accession = e.accession_number; + open = null; + } + break; + } + case "completed": { + const d = open ?? openNew(e); + d.outcome = "completed"; + d.outcome_date = e.event_date; + d.source_accession = e.accession_number; + open = null; + break; + } + case "vote": { + if (open) { + if (open.vote_date == null || e.event_date > open.vote_date) { + open.vote_date = e.event_date; + } + open.source_accession = e.accession_number; + } + break; + } + } + } + + const existingByIndex = new Map(existingDeals.map((d) => [d.deal_index, d])); + return skeletons.map((s) => { + const prev = existingByIndex.get(s.deal_index); + return { + cik, + deal_index: s.deal_index, + target_name: prev?.target_name ?? null, + target_cik: prev?.target_cik ?? null, + proxy_date: prev?.proxy_date ?? null, + pipe_amount: prev?.pipe_amount ?? null, + redemption_amount: prev?.redemption_amount ?? null, + redemption_shares: prev?.redemption_shares ?? null, + announced_date: s.announced_date, + definitive_agreement_date: s.definitive_agreement_date, + vote_date: s.vote_date, + outcome: s.outcome, + outcome_date: s.outcome_date, + source_accession: s.source_accession, + created_at: prev?.created_at ?? new Date().toISOString(), + }; + }); +} From dad6d80254089aad204715f3767808e3da520ca6 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 05:52:58 +0000 Subject: [PATCH 02/39] refactor(spac): tidy deal grouping + document merge invariant --- src/storage/spac/spacDealGrouping.ts | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/storage/spac/spacDealGrouping.ts b/src/storage/spac/spacDealGrouping.ts index 083adfe..79b7d92 100644 --- a/src/storage/spac/spacDealGrouping.ts +++ b/src/storage/spac/spacDealGrouping.ts @@ -30,9 +30,17 @@ interface DealSkeleton { * * Deterministic + replay-safe: events are ordered by `(event_date, * accession_number)` and walked with a single "open deal" cursor, so the same - * event set always yields the same `deal_index` assignments. The result - * merge-preserves §4b-owned columns (`target_*`, `pipe_amount`, `redemption_*`, - * `proxy_date`) and `created_at` from any matching existing deal row. + * event set always yields the same `deal_index` assignments. `source_accession` + * reflects the latest event that shaped the deal (the completion accession for a + * completed deal; the latest DA for a pending one). + * + * The result merge-preserves §4b-owned columns (`target_*`, `pipe_amount`, + * `redemption_*`, `proxy_date`) and `created_at` from any existing deal row. + * That merge binds existing rows to recomputed deals positionally by + * `deal_index`, which assumes the upstream event set stays append-only and + * stable: a back-filled earlier-dated DA that renumbers attempts would rebind + * enriched data to a different attempt — an accepted, rare property of strict + * chronological ordinals. */ export function deriveDealsFromEvents( cik: number, @@ -40,8 +48,7 @@ export function deriveDealsFromEvents( existingDeals: readonly SpacDeal[] ): SpacDeal[] { const relevant = events - .filter((e) => DEAL_RELEVANT_EVENT_TYPES.includes(e.event_type as SpacEventType)) - .slice() + .filter((e) => DEAL_RELEVANT_EVENT_TYPES.includes(e.event_type)) .sort( (a, b) => a.event_date.localeCompare(b.event_date) || From 695c0dec6540ee181c7671f2d7ba70c56be149ab Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 05:56:28 +0000 Subject: [PATCH 03/39] feat(spac): add recordDealMilestones writer method Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- src/storage/spac/SpacReportWriter.test.ts | 84 +++++++++++++++++++++++ src/storage/spac/SpacReportWriter.ts | 39 ++++++++++- 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/src/storage/spac/SpacReportWriter.test.ts b/src/storage/spac/SpacReportWriter.test.ts index 998fb37..69f19ed 100644 --- a/src/storage/spac/SpacReportWriter.test.ts +++ b/src/storage/spac/SpacReportWriter.test.ts @@ -179,4 +179,88 @@ describe("SpacReportWriter", () => { const row = await repo.getSpac(9); expect(JSON.parse(row!.spac_tickers!)).toEqual(["NEO.U", "NEO"]); }); + + it("rolls a registered SPAC forward through DA, vote, and completion", async () => { + await writer.recordRegistration({ + cik: 10, + accession_number: "0000-reg", + filing_date: "2020-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Merge SPAC", + spac_sic: 6770, + }); + + await writer.recordDealMilestones({ + cik: 10, + accession_number: "0000-da", + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + let row = await repo.getSpac(10); + expect(row?.status).toBe("deal_announced"); + expect(row?.definitive_agreement_date).toBe("2021-03-01"); + + await writer.recordDealMilestones({ + cik: 10, + accession_number: "0000-vote", + filing_date: "2021-06-02", + form: "8-K", + primary_document: null, + events: [{ event_type: "vote", event_date: "2021-06-01" }], + }); + row = await repo.getSpac(10); + expect(row?.status).toBe("proxy"); + expect(row?.vote_date).toBe("2021-06-01"); + + await writer.recordDealMilestones({ + cik: 10, + accession_number: "0000-close", + filing_date: "2021-06-16", + form: "8-K", + primary_document: null, + events: [{ event_type: "completed", event_date: "2021-06-15" }], + }); + row = await repo.getSpac(10); + expect(row?.status).toBe("completed"); + expect(row?.completed_date).toBe("2021-06-15"); + + const deals = await repo.getDeals(10); + expect(deals.length).toBe(1); + expect(deals[0].outcome).toBe("completed"); + expect(deals[0].target_name).toBeNull(); // not available from item codes + }); + + it("is idempotent when the same milestone 8-K is reprocessed", async () => { + const call = { + cik: 11, + accession_number: "0000-da", + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement" as const, event_date: "2021-03-01" }], + }; + await writer.recordDealMilestones(call); + await writer.recordDealMilestones(call); + + const events = await repo.getEvents(11); + expect(events.filter((e) => e.event_type === "definitive_agreement").length).toBe(1); + const deals = await repo.getDeals(11); + expect(deals.length).toBe(1); + }); + + it("does nothing when given no events", async () => { + await writer.recordDealMilestones({ + cik: 12, + accession_number: "0000-none", + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [], + }); + expect(await repo.getSpac(12)).toBeUndefined(); + expect(await repo.getEvents(12)).toEqual([]); + }); }); diff --git a/src/storage/spac/SpacReportWriter.ts b/src/storage/spac/SpacReportWriter.ts index 00efef4..9510878 100644 --- a/src/storage/spac/SpacReportWriter.ts +++ b/src/storage/spac/SpacReportWriter.ts @@ -7,8 +7,9 @@ import { globalServiceRegistry, uuid4 } from "workglow"; import { SpacRepo } from "./SpacRepo"; import { buildSpacRow, type SpacRowPatch } from "./spacRollup"; +import { deriveDealsFromEvents } from "./spacDealGrouping"; import type { Spac } from "./SpacSchema"; -import type { SpacEvent } from "./SpacEventSchema"; +import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; import type { SpacHistory } from "./SpacHistorySchema"; import { CHANGE_LOG_REPOSITORY_TOKEN } from "../change-tracking/ChangeLogSchema"; @@ -33,6 +34,16 @@ interface RecordIpoArgs { readonly spac_tickers: readonly string[] | null; } +interface RecordDealMilestonesArgs { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + readonly primary_document: string | null; + /** event_date is pre-resolved by the caller (report_date ?? filing_date). */ + readonly events: readonly { event_type: SpacEventType; event_date: string }[]; +} + /** Fields compared for ChangeLog/history; everything except the volatile timestamp. */ const TRACKED_FIELDS: readonly (keyof Spac)[] = [ "current_cik", "status", "spac_name", "target_name", "surviving_name", "current_name", @@ -89,6 +100,32 @@ export class SpacReportWriter { }); } + /** + * Record de-SPAC milestone events mapped from 8-K item codes: append each + * event (idempotent by PK), recompute the deal set from the full event + * stream (merge-preserving §4b-owned columns), then rebuild the row. + */ + async recordDealMilestones(args: RecordDealMilestonesArgs): Promise { + if (args.events.length === 0) return; + for (const e of args.events) { + await this.appendEvent({ + cik: args.cik, + accession_number: args.accession_number, + event_type: e.event_type, + event_date: e.event_date, + form: args.form, + primary_document: args.primary_document, + }); + } + const events = await this.repo.getEvents(args.cik); + const existingDeals = await this.repo.getDeals(args.cik); + const deals = deriveDealsFromEvents(args.cik, events, existingDeals); + for (const deal of deals) { + await this.repo.saveDeal(deal); + } + await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); + } + private async appendEvent( partial: Pick & Partial From d51b229531f3555db18e731bb09334fd0f257905 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 05:59:15 +0000 Subject: [PATCH 04/39] feat(spac): map 8-K milestone item codes to lifecycle events Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../spac8kMilestones.test.ts | 36 ++++++++++++++++++ .../miscellaneous-filings/spac8kMilestones.ts | 37 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts create mode 100644 src/sec/forms/miscellaneous-filings/spac8kMilestones.ts diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts new file mode 100644 index 0000000..be6a0e3 --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts @@ -0,0 +1,36 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from "bun:test"; +import { mapItemCodesToSpacEvents } from "./spac8kMilestones"; + +describe("mapItemCodesToSpacEvents", () => { + it("maps the four milestone item codes to lifecycle events", () => { + expect(mapItemCodesToSpacEvents(["1.01"], "2021-03-01")).toEqual([ + { event_type: "definitive_agreement", event_date: "2021-03-01" }, + ]); + expect(mapItemCodesToSpacEvents(["1.02"], "2021-03-01")).toEqual([ + { event_type: "terminated", event_date: "2021-03-01" }, + ]); + expect(mapItemCodesToSpacEvents(["2.01"], "2021-03-01")).toEqual([ + { event_type: "completed", event_date: "2021-03-01" }, + ]); + expect(mapItemCodesToSpacEvents(["5.07"], "2021-03-01")).toEqual([ + { event_type: "vote", event_date: "2021-03-01" }, + ]); + }); + + it("ignores non-milestone item codes", () => { + expect(mapItemCodesToSpacEvents(["2.02", "9.01", "7.01"], "2021-03-01")).toEqual([]); + }); + + it("maps only the milestone items from a mixed filing", () => { + const events = mapItemCodesToSpacEvents(["1.01", "7.01", "8.01", "9.01"], "2021-03-01"); + expect(events).toEqual([ + { event_type: "definitive_agreement", event_date: "2021-03-01" }, + ]); + }); +}); diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts new file mode 100644 index 0000000..b07eafc --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts @@ -0,0 +1,37 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { SpacEventType } from "../../../storage/spac/SpacEventSchema"; + +/** 8-K item code -> SPAC lifecycle event. Only these four items participate. */ +const ITEM_TO_SPAC_EVENT: Record = { + "1.01": "definitive_agreement", + "1.02": "terminated", + "2.01": "completed", + "5.07": "vote", +}; + +export interface SpacMilestoneEvent { + readonly event_type: SpacEventType; + readonly event_date: string; +} + +/** + * Map a filing's 8-K item codes to SPAC lifecycle events. `eventDate` is the + * triggering-event date the caller resolved (`report_date ?? filing_date`). + * Non-milestone items are dropped. + */ +export function mapItemCodesToSpacEvents( + itemCodes: readonly string[], + eventDate: string +): SpacMilestoneEvent[] { + const events: SpacMilestoneEvent[] = []; + for (const code of itemCodes) { + const event_type = ITEM_TO_SPAC_EVENT[code]; + if (event_type) events.push({ event_type, event_date: eventDate }); + } + return events; +} From edc6e7571dff82e8af9d73a42bc63a9b569a83f2 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 06:02:20 +0000 Subject: [PATCH 05/39] feat(spac): wire 8-K milestone mapping into processForm8K Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../miscellaneous-filings/Form_8_K.storage.ts | 20 +++++ .../spac8kMilestones.test.ts | 86 ++++++++++++++++++- 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts index f762aec..dbfc5eb 100644 --- a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts +++ b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts @@ -8,6 +8,9 @@ import { Form8KEventRepo } from "../../../storage/form-8k-event/Form8KEventRepo" import type { Form8KEvent } from "../../../storage/form-8k-event/Form8KEventSchema"; import type { Form8K } from "./Form_8_K.schema"; import { Form_8_K_ITEMS } from "./Form_8_K"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { mapItemCodesToSpacEvents } from "./spac8kMilestones"; /** * Extracts item codes from the filing metadata `items` field. @@ -76,4 +79,21 @@ export async function processForm8K({ }; await eventRepo.saveEvent(event); } + + // --- Consolidated SPAC report: map de-SPAC milestone items (known SPACs only) --- + const spacRow = await new SpacRepo().getSpac(cik); + if (spacRow) { + const eventDate = effectiveReportDate || filing_date; + const spacEvents = mapItemCodesToSpacEvents(itemCodes, eventDate); + if (spacEvents.length > 0) { + await new SpacReportWriter().recordDealMilestones({ + cik, + accession_number, + filing_date, + form, + primary_document: null, + events: spacEvents, + }); + } + } } diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts index be6a0e3..4f2e862 100644 --- a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts @@ -4,8 +4,14 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, expect, it } from "bun:test"; +import { beforeEach, describe, expect, it } from "bun:test"; import { mapItemCodesToSpacEvents } from "./spac8kMilestones"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { Form_8_K } from "./Form_8_K"; +import { processForm8K } from "./Form_8_K.storage"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; describe("mapItemCodesToSpacEvents", () => { it("maps the four milestone item codes to lifecycle events", () => { @@ -34,3 +40,81 @@ describe("mapItemCodesToSpacEvents", () => { ]); }); }); + +describe("processForm8K SPAC milestone wiring", () => { + let repo: SpacRepo; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacRepo(); + }); + + async function seedSpac(cik: number): Promise { + await new SpacReportWriter().recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2020-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Test SPAC", + spac_sic: 6770, + }); + } + + async function run8K( + cik: number, + accession_number: string, + items: string, + report_date: string + ): Promise { + const form8K = await Form_8_K.parse("8-K", ""); + await processForm8K({ + cik, + accession_number, + filing_date: report_date, + form: "8-K", + items, + report_date, + form8K, + }); + } + + it("advances a known SPAC through DA -> vote -> completion", async () => { + await seedSpac(100); + await run8K(100, "100-da", "1.01,9.01", "2021-03-01"); + await run8K(100, "100-vote", "5.07", "2021-06-01"); + await run8K(100, "100-close", "2.01,5.01", "2021-06-15"); + + const row = await repo.getSpac(100); + expect(row?.status).toBe("completed"); + expect(row?.definitive_agreement_date).toBe("2021-03-01"); + expect(row?.vote_date).toBe("2021-06-01"); + expect(row?.completed_date).toBe("2021-06-15"); + + const deals = await repo.getDeals(100); + expect(deals.length).toBe(1); + expect(deals[0].outcome).toBe("completed"); + }); + + it("writes no SPAC events for a CIK with no spac row", async () => { + await run8K(200, "200-da", "1.01,9.01", "2021-03-01"); + expect(await repo.getSpac(200)).toBeUndefined(); + expect(await repo.getEvents(200)).toEqual([]); + expect(await repo.getDeals(200)).toEqual([]); + }); + + it("uses report_date as the event date and is idempotent on reprocess", async () => { + await seedSpac(300); + await run8K(300, "300-da", "1.01", "2021-03-01"); + await run8K(300, "300-da", "1.01", "2021-03-01"); // reprocess + + const events = await repo.getEvents(300); + expect(events.filter((e) => e.event_type === "definitive_agreement").length).toBe(1); + expect( + events.find((e) => e.event_type === "definitive_agreement")?.event_date + ).toBe("2021-03-01"); + const deals = await repo.getDeals(300); + expect(deals.length).toBe(1); + }); +}); From 776828bf8fb20460bb9d3cdceadc65a9138da8af Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 06:06:23 +0000 Subject: [PATCH 06/39] test(spac): pin report_date precedence; tighten milestone mapping doc Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../spac8kMilestones.test.ts | 20 +++++++++++++++++++ .../miscellaneous-filings/spac8kMilestones.ts | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts index 4f2e862..52bc54c 100644 --- a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts @@ -117,4 +117,24 @@ describe("processForm8K SPAC milestone wiring", () => { const deals = await repo.getDeals(300); expect(deals.length).toBe(1); }); + + it("prefers report_date over filing_date for the event date", async () => { + await seedSpac(400); + const form8K = await Form_8_K.parse("8-K", ""); + await processForm8K({ + cik: 400, + accession_number: "400-da", + filing_date: "2021-03-10", // later than the report/triggering date + form: "8-K", + items: "1.01", + report_date: "2021-03-01", // the actual triggering-event date + form8K, + }); + + const events = await repo.getEvents(400); + const da = events.find((e) => e.event_type === "definitive_agreement"); + expect(da?.event_date).toBe("2021-03-01"); + const deals = await repo.getDeals(400); + expect(deals[0].definitive_agreement_date).toBe("2021-03-01"); + }); }); diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts index b07eafc..2cf4bb2 100644 --- a/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.ts @@ -21,8 +21,8 @@ export interface SpacMilestoneEvent { /** * Map a filing's 8-K item codes to SPAC lifecycle events. `eventDate` is the - * triggering-event date the caller resolved (`report_date ?? filing_date`). - * Non-milestone items are dropped. + * caller's resolved triggering-event date (the 8-K period-of-report, falling + * back to the filing date). Non-milestone items are dropped. */ export function mapItemCodesToSpacEvents( itemCodes: readonly string[], From c75f4fa2277a95536dcfe91f762a033f1f4b924c Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 06:09:25 +0000 Subject: [PATCH 07/39] fix(spac): restore SpacEventType cast for event_type membership check Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- src/storage/spac/spacDealGrouping.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storage/spac/spacDealGrouping.ts b/src/storage/spac/spacDealGrouping.ts index 79b7d92..f9c31d0 100644 --- a/src/storage/spac/spacDealGrouping.ts +++ b/src/storage/spac/spacDealGrouping.ts @@ -48,7 +48,7 @@ export function deriveDealsFromEvents( existingDeals: readonly SpacDeal[] ): SpacDeal[] { const relevant = events - .filter((e) => DEAL_RELEVANT_EVENT_TYPES.includes(e.event_type)) + .filter((e) => DEAL_RELEVANT_EVENT_TYPES.includes(e.event_type as SpacEventType)) .sort( (a, b) => a.event_date.localeCompare(b.event_date) || From ea4bbbe4b8f3062946dfa02d0bc05269007a40df Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 06:09:54 +0000 Subject: [PATCH 08/39] docs(spac): document 8-K milestone population in CLAUDE.md Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- CLAUDE.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index d56628c..747d299 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -175,9 +175,16 @@ and rolled-up key dates. It is **derived** from two append-only tables — `spac so replays are idempotent; an `as_of` guard protects filing-sourced scalar fields from out-of-order writes, and `spac_history` + `ChangeLog` version the row. -Today only the IPO half is populated (S-1/DRS → `registration`, priced 424B1/424B4 -→ `ipo`); de-SPAC events (8-K items, S-4/proxy, redemptions, PIPE, de-registration) -are defined-but-deferred slots. +The IPO half is populated from S-1/DRS (`registration`) and priced 424B1/424B4 +(`ipo`). De-SPAC **milestone dates** are populated deterministically from 8-K +item codes (known SPACs only — a `spac` row must already exist): item `1.01` → +`definitive_agreement`, `1.02` → `terminated`, `2.01` → `completed`, `5.07` → +`vote`. These group into `spac_deal` attempts via `deriveDealsFromEvents` +(recomputed from the event stream on every write, so `deal_index` is stable +across replays) and roll up automatically. `target_name`, `pipe_amount`, and +redemption amounts stay null until the narrative/AI extractors (S-4 / DEFM14A / +425) land — 8-K item codes carry no names or amounts. Still deferred: name/SIC/ +ticker transitions and Form 25/15 de-registration. ```bash sec spac report [--format json] # consolidated report From 5cfff5cd7c75b2deb6f4166b7e4e124bb0528f19 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 06:12:28 +0000 Subject: [PATCH 09/39] docs(spac): refresh event-vocabulary comment for written milestones --- src/storage/spac/SpacEventSchema.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/storage/spac/SpacEventSchema.ts b/src/storage/spac/SpacEventSchema.ts index e8c4f8f..d6645ca 100644 --- a/src/storage/spac/SpacEventSchema.ts +++ b/src/storage/spac/SpacEventSchema.ts @@ -9,7 +9,12 @@ import type { ITabularStorage } from "workglow"; import { createServiceToken } from "workglow"; import { TypeNullable, TypeStringEnum } from "../../util/TypeBoxUtil"; -/** Lifecycle event vocabulary. Only `registration` and `ipo` are written today. */ +/** + * Lifecycle event vocabulary. `registration` / `ipo` come from S-1/424; the + * de-SPAC milestones `definitive_agreement` / `terminated` / `completed` / + * `vote` are written from 8-K item codes. The remaining types are reserved for + * deferred extractors (S-4/DEFM14A, Form 425, Form 25/15). + */ export const SPAC_EVENT_TYPES = [ "registration", "ipo", From 502505a3e2a08a424c4258ba5c02d7f224bebbbb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 06:20:58 +0000 Subject: [PATCH 10/39] fix(spac): guard 8-K milestone write against empty event_date Skip the milestone path when neither period-of-report nor filing_date is available, so an undated 8-K can't write junk dates onto the deal/row. Adds a regression test. --- .../miscellaneous-filings/Form_8_K.storage.ts | 6 +++++- .../spac8kMilestones.test.ts | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts index dbfc5eb..ec2c719 100644 --- a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts +++ b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts @@ -83,8 +83,12 @@ export async function processForm8K({ // --- Consolidated SPAC report: map de-SPAC milestone items (known SPACs only) --- const spacRow = await new SpacRepo().getSpac(cik); if (spacRow) { + // Skip when no usable date is available: an undated milestone (empty + // event_date) would write junk announced_date/definitive_agreement_date + // onto the deal/row. Reachable only on the best-effort path where the + // filing-metadata row is absent (report_date null, filing_date ""). const eventDate = effectiveReportDate || filing_date; - const spacEvents = mapItemCodesToSpacEvents(itemCodes, eventDate); + const spacEvents = eventDate ? mapItemCodesToSpacEvents(itemCodes, eventDate) : []; if (spacEvents.length > 0) { await new SpacReportWriter().recordDealMilestones({ cik, diff --git a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts index 52bc54c..1958ec8 100644 --- a/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts +++ b/src/sec/forms/miscellaneous-filings/spac8kMilestones.test.ts @@ -137,4 +137,23 @@ describe("processForm8K SPAC milestone wiring", () => { const deals = await repo.getDeals(400); expect(deals[0].definitive_agreement_date).toBe("2021-03-01"); }); + + it("records no milestone when neither report_date nor filing_date is available", async () => { + await seedSpac(500); + const form8K = await Form_8_K.parse("8-K", ""); + await processForm8K({ + cik: 500, + accession_number: "500-da", + filing_date: "", // best-effort path: filing-metadata row absent + form: "8-K", + items: "1.01", + report_date: null, + form8K, + }); + + // An undated 8-K must not write a milestone (empty event_date would be junk). + const events = await repo.getEvents(500); + expect(events.some((e) => e.event_type === "definitive_agreement")).toBe(false); + expect(await repo.getDeals(500)).toEqual([]); + }); }); From 783e1cb59ebf89520a2f8649235e68569ee4ad3a Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 15:47:15 +0000 Subject: [PATCH 11/39] feat(spac): add spac_merger_extraction table + repo --- src/config/DefaultDI.ts | 14 +++++ src/config/TestingDI.ts | 13 +++++ src/config/setupAllDatabases.ts | 2 + .../spac/SpacMergerExtractionRepo.test.ts | 55 +++++++++++++++++++ src/storage/spac/SpacMergerExtractionRepo.ts | 34 ++++++++++++ .../spac/SpacMergerExtractionSchema.ts | 48 ++++++++++++++++ 6 files changed, 166 insertions(+) create mode 100644 src/storage/spac/SpacMergerExtractionRepo.test.ts create mode 100644 src/storage/spac/SpacMergerExtractionRepo.ts create mode 100644 src/storage/spac/SpacMergerExtractionSchema.ts diff --git a/src/config/DefaultDI.ts b/src/config/DefaultDI.ts index eb41e92..8c2ae7f 100644 --- a/src/config/DefaultDI.ts +++ b/src/config/DefaultDI.ts @@ -344,6 +344,11 @@ import { SpacHistoryPrimaryKeyNames, SpacHistorySchema, } from "../storage/spac/SpacHistorySchema"; +import { + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + SpacMergerExtractionPrimaryKeyNames, + SpacMergerExtractionSchema, +} from "../storage/spac/SpacMergerExtractionSchema"; import { createStorage } from "./createStorage"; export const DefaultDI = () => { @@ -665,6 +670,15 @@ export const DefaultDI = () => { SPAC_HISTORY_REPOSITORY_TOKEN, createStorage("spac_history", SpacHistorySchema, SpacHistoryPrimaryKeyNames, [["cik"]]) ); + globalServiceRegistry.registerInstance( + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + createStorage( + "spac_merger_extraction", + SpacMergerExtractionSchema, + SpacMergerExtractionPrimaryKeyNames, + [["cik"]] + ) + ); // ----- Observation / Canonical / Resolver ----- globalServiceRegistry.registerInstance( diff --git a/src/config/TestingDI.ts b/src/config/TestingDI.ts index 8391d2c..95f4002 100644 --- a/src/config/TestingDI.ts +++ b/src/config/TestingDI.ts @@ -258,6 +258,11 @@ import { SpacHistoryPrimaryKeyNames, SpacHistorySchema, } from "../storage/spac/SpacHistorySchema"; +import { + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + SpacMergerExtractionPrimaryKeyNames, + SpacMergerExtractionSchema, +} from "../storage/spac/SpacMergerExtractionSchema"; import { CANONICAL_COMPANY_REPOSITORY_TOKEN, CanonicalCompanyPrimaryKeyNames, @@ -493,6 +498,14 @@ export function resetDependencyInjectionsForTesting() { SPAC_HISTORY_REPOSITORY_TOKEN, new InMemoryTabularStorage(SpacHistorySchema, SpacHistoryPrimaryKeyNames, [["cik"]]) ); + globalServiceRegistry.registerInstance( + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + new InMemoryTabularStorage( + SpacMergerExtractionSchema, + SpacMergerExtractionPrimaryKeyNames, + [["cik"]] + ) + ); // Initialize Crowdfunding repositories globalServiceRegistry.registerInstance( diff --git a/src/config/setupAllDatabases.ts b/src/config/setupAllDatabases.ts index 48f1e41..b98b631 100644 --- a/src/config/setupAllDatabases.ts +++ b/src/config/setupAllDatabases.ts @@ -54,6 +54,7 @@ import { SPAC_REPOSITORY_TOKEN } from "../storage/spac/SpacSchema"; import { SPAC_DEAL_REPOSITORY_TOKEN } from "../storage/spac/SpacDealSchema"; import { SPAC_EVENT_REPOSITORY_TOKEN } from "../storage/spac/SpacEventSchema"; import { SPAC_HISTORY_REPOSITORY_TOKEN } from "../storage/spac/SpacHistorySchema"; +import { SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN } from "../storage/spac/SpacMergerExtractionSchema"; import { CANONICAL_COMPANY_ALIAS_REPOSITORY_TOKEN, CANONICAL_PERSON_ALIAS_REPOSITORY_TOKEN, @@ -145,6 +146,7 @@ export async function setupAllDatabases(): Promise { await globalServiceRegistry.get(SPAC_DEAL_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(SPAC_EVENT_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(SPAC_HISTORY_REPOSITORY_TOKEN).setupDatabase(); + await globalServiceRegistry.get(SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(CIK_LAST_UPDATE_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(PROCESSED_FACTS_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(PROCESSED_SUBMISSIONS_REPOSITORY_TOKEN).setupDatabase(); diff --git a/src/storage/spac/SpacMergerExtractionRepo.test.ts b/src/storage/spac/SpacMergerExtractionRepo.test.ts new file mode 100644 index 0000000..26e0a30 --- /dev/null +++ b/src/storage/spac/SpacMergerExtractionRepo.test.ts @@ -0,0 +1,55 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { SpacMergerExtractionRepo } from "./SpacMergerExtractionRepo"; +import type { SpacMergerExtraction } from "./SpacMergerExtractionSchema"; + +function row( + p: Partial & Pick +): SpacMergerExtraction { + return { + form: "DEFM14A", + filing_date: "2021-05-01", + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + target_name: null, + target_cik: null, + target_observation_id: null, + pipe_amount: null, + merger_consideration: null, + confidence: 0.9, + source_span: null, + model_id: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + +describe("SpacMergerExtractionRepo", () => { + let repo: SpacMergerExtractionRepo; + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacMergerExtractionRepo(); + }); + + it("round-trips a row and overwrites by accession", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, target_name: "Old Co" })); + await repo.save(row({ accession_number: "a1", cik: 5, target_name: "New Co" })); + expect((await repo.getByAccession("a1"))?.target_name).toBe("New Co"); + }); + + it("queries all extractions for a CIK", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, target_name: "T1" })); + await repo.save(row({ accession_number: "a2", cik: 5, target_name: "T2" })); + await repo.save(row({ accession_number: "b1", cik: 6, target_name: "T3" })); + const forCik = await repo.getByCik(5); + expect(forCik.map((r) => r.target_name).sort()).toEqual(["T1", "T2"]); + }); +}); diff --git a/src/storage/spac/SpacMergerExtractionRepo.ts b/src/storage/spac/SpacMergerExtractionRepo.ts new file mode 100644 index 0000000..b1ca366 --- /dev/null +++ b/src/storage/spac/SpacMergerExtractionRepo.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { globalServiceRegistry } from "workglow"; +import { + SpacMergerExtraction, + SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN, + SpacMergerExtractionRepositoryStorage, +} from "./SpacMergerExtractionSchema"; + +/** Per-accession merger-proxy extraction rows. */ +export class SpacMergerExtractionRepo { + private readonly storage: SpacMergerExtractionRepositoryStorage; + + constructor(storage?: SpacMergerExtractionRepositoryStorage) { + this.storage = storage ?? globalServiceRegistry.get(SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN); + } + + async save(row: SpacMergerExtraction): Promise { + await this.storage.put(row); + } + + async getByAccession(accession_number: string): Promise { + return this.storage.get({ accession_number }); + } + + /** All extractions for a CIK (unordered). */ + async getByCik(cik: number): Promise { + return (await this.storage.query({ cik })) || []; + } +} diff --git a/src/storage/spac/SpacMergerExtractionSchema.ts b/src/storage/spac/SpacMergerExtractionSchema.ts new file mode 100644 index 0000000..3963215 --- /dev/null +++ b/src/storage/spac/SpacMergerExtractionSchema.ts @@ -0,0 +1,48 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import type { ITabularStorage } from "workglow"; +import { createServiceToken } from "workglow"; +import { TypeNullable } from "../../util/TypeBoxUtil"; + +/** + * One row per merger-proxy filing (DEFM14A/PREM14A). Current-state: a + * re-extraction overwrites by accession. `target_*` / `pipe_amount` are + * correlated onto the matching `spac_deal` by `deriveDeals`; `merger_consideration` + * stays here (report + provenance only). + */ +export const SpacMergerExtractionSchema = Type.Object({ + accession_number: Type.String({ maxLength: 25 }), + cik: Type.Integer({ minimum: 0, description: "SPAC origin CIK (filer)" }), + form: Type.String({ maxLength: 20 }), + filing_date: Type.String({ format: "date" }), + extractor_id: Type.String({ maxLength: 32 }), + extractor_version: Type.String({ maxLength: 32 }), + target_name: TypeNullable(Type.String({ maxLength: 300 })), + target_cik: TypeNullable(Type.Integer({ minimum: 0 })), + target_observation_id: TypeNullable(Type.Integer({ minimum: 0 })), + pipe_amount: TypeNullable(Type.Number()), + merger_consideration: TypeNullable(Type.String({ maxLength: 2000 })), + confidence: Type.Number(), + source_span: TypeNullable(Type.String({ maxLength: 2000 })), + model_id: TypeNullable(Type.String({ maxLength: 128 })), + created_at: Type.String({ format: "date-time" }), +}); + +export type SpacMergerExtraction = Static; + +export const SpacMergerExtractionPrimaryKeyNames = ["accession_number"] as const; +export type SpacMergerExtractionRepositoryStorage = ITabularStorage< + typeof SpacMergerExtractionSchema, + typeof SpacMergerExtractionPrimaryKeyNames, + SpacMergerExtraction +>; + +export const SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN = + createServiceToken( + "sec.storage.spacMergerExtractionRepository" + ); From 0cd3d14f31e1907a10902bdd54cf391db7e43ca0 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 15:48:32 +0000 Subject: [PATCH 12/39] feat(spac): correlate merger extractions + proxy events into deals --- src/storage/spac/spacDealGrouping.test.ts | 121 +++++++++++++++++----- src/storage/spac/spacDealGrouping.ts | 106 +++++++++++++------ 2 files changed, 170 insertions(+), 57 deletions(-) diff --git a/src/storage/spac/spacDealGrouping.test.ts b/src/storage/spac/spacDealGrouping.test.ts index 7e37bf3..38d8af6 100644 --- a/src/storage/spac/spacDealGrouping.test.ts +++ b/src/storage/spac/spacDealGrouping.test.ts @@ -5,9 +5,10 @@ */ import { describe, expect, it } from "bun:test"; -import { deriveDealsFromEvents } from "./spacDealGrouping"; +import { deriveDeals } from "./spacDealGrouping"; import type { SpacDeal } from "./SpacDealSchema"; import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; +import type { SpacMergerExtraction } from "./SpacMergerExtractionSchema"; function ev( event_type: SpacEventType, @@ -31,6 +32,31 @@ function ev( }; } +function ext( + accession_number: string, + filing_date: string, + p: Partial = {} +): SpacMergerExtraction { + return { + accession_number, + cik: 1, + form: "DEFM14A", + filing_date, + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + target_name: null, + target_cik: null, + target_observation_id: null, + pipe_amount: null, + merger_consideration: null, + confidence: 0.9, + source_span: null, + model_id: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + function deal(p: Pick & Partial): SpacDeal { return { cik: 1, @@ -50,15 +76,16 @@ function deal(p: Pick & Partial): }; } -describe("deriveDealsFromEvents", () => { +describe("deriveDeals", () => { it("groups DA -> vote -> completion into one completed deal", () => { - const deals = deriveDealsFromEvents( + const deals = deriveDeals( 1, [ ev("definitive_agreement", "2021-03-01"), ev("vote", "2021-06-01"), ev("completed", "2021-06-15"), ], + [], [] ); expect(deals.length).toBe(1); @@ -71,7 +98,7 @@ describe("deriveDealsFromEvents", () => { }); it("splits a terminated attempt and a later completed attempt into two deals", () => { - const deals = deriveDealsFromEvents( + const deals = deriveDeals( 1, [ ev("definitive_agreement", "2021-01-01"), @@ -79,6 +106,7 @@ describe("deriveDealsFromEvents", () => { ev("definitive_agreement", "2021-05-01"), ev("completed", "2021-09-01"), ], + [], [] ); expect(deals.map((d) => d.deal_index)).toEqual([0, 1]); @@ -90,12 +118,12 @@ describe("deriveDealsFromEvents", () => { }); it("ignores an extension vote with no open deal", () => { - const deals = deriveDealsFromEvents(1, [ev("vote", "2021-04-01")], []); + const deals = deriveDeals(1, [ev("vote", "2021-04-01")], [], []); expect(deals.length).toBe(0); }); it("opens an already-completed deal when 2.01 has no preceding DA", () => { - const deals = deriveDealsFromEvents(1, [ev("completed", "2021-09-01")], []); + const deals = deriveDeals(1, [ev("completed", "2021-09-01")], [], []); expect(deals.length).toBe(1); expect(deals[0].outcome).toBe("completed"); expect(deals[0].announced_date).toBeNull(); @@ -103,7 +131,7 @@ describe("deriveDealsFromEvents", () => { }); it("assigns the same deal_index regardless of event insertion order", () => { - const ordered = deriveDealsFromEvents( + const ordered = deriveDeals( 1, [ ev("definitive_agreement", "2021-01-01"), @@ -111,9 +139,10 @@ describe("deriveDealsFromEvents", () => { ev("definitive_agreement", "2021-05-01"), ev("completed", "2021-09-01"), ], + [], [] ); - const shuffled = deriveDealsFromEvents( + const shuffled = deriveDeals( 1, [ ev("completed", "2021-09-01"), @@ -121,35 +150,75 @@ describe("deriveDealsFromEvents", () => { ev("definitive_agreement", "2021-01-01"), ev("terminated", "2021-02-01"), ], + [], [] ); expect(shuffled).toEqual(ordered); }); - it("merge-preserves AI-enriched fields not owned by 8-K", () => { - const existing = [ - deal({ - deal_index: 0, - outcome: "pending", - target_name: "Acme Target Inc.", - target_cik: 99, - pipe_amount: 150_000_000, - proxy_date: "2021-05-20", - created_at: "2020-01-01T00:00:00.000Z", - }), - ]; - const deals = deriveDealsFromEvents( + it("preserves created_at from an existing deal row", () => { + const existing = [deal({ deal_index: 0, outcome: "pending", created_at: "2020-01-01T00:00:00.000Z" })]; + const deals = deriveDeals(1, [ev("definitive_agreement", "2021-03-01")], [], existing); + expect(deals[0].created_at).toBe("2020-01-01T00:00:00.000Z"); + }); + + it("derives target/pipe onto the deal whose window contains the proxy filing", () => { + const deals = deriveDeals( 1, [ev("definitive_agreement", "2021-03-01"), ev("completed", "2021-06-15")], - existing + [ext("p1", "2021-05-01", { target_name: "Acme Target Inc.", pipe_amount: 150_000_000 })], + [] ); expect(deals.length).toBe(1); - expect(deals[0].outcome).toBe("completed"); - expect(deals[0].outcome_date).toBe("2021-06-15"); expect(deals[0].target_name).toBe("Acme Target Inc."); - expect(deals[0].target_cik).toBe(99); expect(deals[0].pipe_amount).toBe(150_000_000); + }); + + it("lets a definitive proxy supersede an earlier preliminary one (latest non-null wins)", () => { + const deals = deriveDeals( + 1, + [ev("definitive_agreement", "2021-03-01"), ev("completed", "2021-06-15")], + [ + ext("prem", "2021-04-01", { form: "PREM14A", target_name: "Acme Target Inc.", pipe_amount: null }), + ext("defm", "2021-05-10", { form: "DEFM14A", target_name: "Acme Target, Inc.", pipe_amount: 200_000_000 }), + ], + [] + ); + expect(deals[0].target_name).toBe("Acme Target, Inc."); // definitive wins + expect(deals[0].pipe_amount).toBe(200_000_000); + }); + + it("leaves an extraction with no matching open deal unattached", () => { + // proxy filed before any DA event -> no deal yet + const deals = deriveDeals(1, [], [ext("p1", "2021-05-01", { target_name: "Acme" })], []); + expect(deals.length).toBe(0); + }); + + it("routes two deals' proxies to the correct deal_index", () => { + const deals = deriveDeals( + 1, + [ + ev("definitive_agreement", "2021-01-01"), + ev("terminated", "2021-02-15"), + ev("definitive_agreement", "2021-05-01"), + ev("completed", "2021-09-01"), + ], + [ + ext("p0", "2021-01-20", { target_name: "First Target" }), + ext("p1", "2021-06-01", { target_name: "Second Target" }), + ], + [] + ); + expect(deals.map((d) => d.target_name)).toEqual(["First Target", "Second Target"]); + }); + + it("sets proxy_date from a proxy event on the open deal", () => { + const deals = deriveDeals( + 1, + [ev("definitive_agreement", "2021-03-01"), ev("proxy", "2021-05-20")], + [], + [] + ); expect(deals[0].proxy_date).toBe("2021-05-20"); - expect(deals[0].created_at).toBe("2020-01-01T00:00:00.000Z"); }); }); diff --git a/src/storage/spac/spacDealGrouping.ts b/src/storage/spac/spacDealGrouping.ts index f9c31d0..495e08f 100644 --- a/src/storage/spac/spacDealGrouping.ts +++ b/src/storage/spac/spacDealGrouping.ts @@ -6,6 +6,7 @@ import type { SpacDeal, SpacDealOutcome } from "./SpacDealSchema"; import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; +import type { SpacMergerExtraction } from "./SpacMergerExtractionSchema"; /** Event types that shape a business-combination attempt. */ const DEAL_RELEVANT_EVENT_TYPES: readonly SpacEventType[] = [ @@ -13,38 +14,45 @@ const DEAL_RELEVANT_EVENT_TYPES: readonly SpacEventType[] = [ "terminated", "completed", "vote", + "proxy", ]; interface DealSkeleton { deal_index: number; announced_date: string | null; definitive_agreement_date: string | null; + proxy_date: string | null; vote_date: string | null; outcome: SpacDealOutcome; outcome_date: string | null; source_accession: string | null; + // §4b columns, derived by correlating merger extractions (below). + target_name: string | null; + target_cik: number | null; + pipe_amount: number | null; } /** - * Rebuild the full {@link SpacDeal} set for a CIK from its append-only events. + * Rebuild the full {@link SpacDeal} set for a CIK from its append-only events + * and merger-proxy extractions. * * Deterministic + replay-safe: events are ordered by `(event_date, * accession_number)` and walked with a single "open deal" cursor, so the same * event set always yields the same `deal_index` assignments. `source_accession` - * reflects the latest event that shaped the deal (the completion accession for a - * completed deal; the latest DA for a pending one). + * reflects the latest event that shaped the deal. 8-K-owned columns + * (`announced_date`, `definitive_agreement_date`, `vote_date`, `outcome`, + * `outcome_date`) and `proxy_date` come from the event walk. * - * The result merge-preserves §4b-owned columns (`target_*`, `pipe_amount`, - * `redemption_*`, `proxy_date`) and `created_at` from any existing deal row. - * That merge binds existing rows to recomputed deals positionally by - * `deal_index`, which assumes the upstream event set stays append-only and - * stable: a back-filled earlier-dated DA that renumbers attempts would rebind - * enriched data to a different attempt — an accepted, rare property of strict - * chronological ordinals. + * §4b-owned columns (`target_name`, `target_cik`, `pipe_amount`) are **derived** + * by correlating each {@link SpacMergerExtraction} to the deal whose + * `[announced, closed)` window contains the proxy's `filing_date` (definitive + * supersedes preliminary; latest non-null wins). Redemption actuals are deferred + * (post-vote 8-K) and stay null. `created_at` is preserved from any existing row. */ -export function deriveDealsFromEvents( +export function deriveDeals( cik: number, events: readonly SpacEvent[], + mergerExtractions: readonly SpacMergerExtraction[], existingDeals: readonly SpacDeal[] ): SpacDeal[] { const relevant = events @@ -64,10 +72,14 @@ export function deriveDealsFromEvents( deal_index: nextIndex++, announced_date: null, definitive_agreement_date: null, + proxy_date: null, vote_date: null, outcome: "pending", outcome_date: null, source_accession: e.accession_number, + target_name: null, + target_cik: null, + pipe_amount: null, }; skeletons.push(d); return d; @@ -113,28 +125,60 @@ export function deriveDealsFromEvents( } break; } + case "proxy": { + if (open) { + if (open.proxy_date == null || e.event_date > open.proxy_date) { + open.proxy_date = e.event_date; + } + open.source_accession = e.accession_number; + } + // No open deal -> proxy with no announced deal: timeline-only. + break; + } + } + } + + // --- Correlate merger extractions onto deals by filing-date window --- + // A deal owns [lower, upper): lower = its announced/DA date, upper = its + // outcome_date else the next deal's announced date else open-ended. + for (let i = 0; i < skeletons.length; i++) { + const d = skeletons[i]; + const lower = d.announced_date ?? d.definitive_agreement_date ?? null; + const upper = d.outcome_date ?? skeletons[i + 1]?.announced_date ?? null; + const matched = mergerExtractions + .filter( + (m) => + (lower == null || m.filing_date >= lower) && (upper == null || m.filing_date < upper) + ) + .sort((a, b) => a.filing_date.localeCompare(b.filing_date)); + // Latest non-null wins per field; earlier non-nulls survive when later is null. + for (const m of matched) { + if (m.target_name != null) d.target_name = m.target_name; + if (m.target_cik != null) d.target_cik = m.target_cik; + if (m.pipe_amount != null) d.pipe_amount = m.pipe_amount; } } const existingByIndex = new Map(existingDeals.map((d) => [d.deal_index, d])); - return skeletons.map((s) => { - const prev = existingByIndex.get(s.deal_index); - return { - cik, - deal_index: s.deal_index, - target_name: prev?.target_name ?? null, - target_cik: prev?.target_cik ?? null, - proxy_date: prev?.proxy_date ?? null, - pipe_amount: prev?.pipe_amount ?? null, - redemption_amount: prev?.redemption_amount ?? null, - redemption_shares: prev?.redemption_shares ?? null, - announced_date: s.announced_date, - definitive_agreement_date: s.definitive_agreement_date, - vote_date: s.vote_date, - outcome: s.outcome, - outcome_date: s.outcome_date, - source_accession: s.source_accession, - created_at: prev?.created_at ?? new Date().toISOString(), - }; - }); + return skeletons.map((s) => ({ + cik, + deal_index: s.deal_index, + // §4b columns: derived from correlated extractions (no merge-preserve). + target_name: s.target_name, + target_cik: s.target_cik, + pipe_amount: s.pipe_amount, + // proxy_date: derived from the proxy event in the walk. + proxy_date: s.proxy_date, + // redemption actuals: deferred (post-vote 8-K) — no source yet. + redemption_amount: null, + redemption_shares: null, + // 8-K-owned columns: + announced_date: s.announced_date, + definitive_agreement_date: s.definitive_agreement_date, + vote_date: s.vote_date, + outcome: s.outcome, + outcome_date: s.outcome_date, + source_accession: s.source_accession, + created_at: existingByIndex.get(s.deal_index)?.created_at ?? new Date().toISOString(), + })); } From 302320ab9aea20dbeede83362fc01bef7637cf64 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 15:50:02 +0000 Subject: [PATCH 13/39] feat(spac): recordMergerProxy + thread extractions through deal recompute --- src/storage/spac/SpacReportWriter.test.ts | 65 +++++++++++++++++++++++ src/storage/spac/SpacReportWriter.ts | 55 ++++++++++++++++--- src/storage/spac/spacDealGrouping.test.ts | 4 +- 3 files changed, 117 insertions(+), 7 deletions(-) diff --git a/src/storage/spac/SpacReportWriter.test.ts b/src/storage/spac/SpacReportWriter.test.ts index 69f19ed..5714c99 100644 --- a/src/storage/spac/SpacReportWriter.test.ts +++ b/src/storage/spac/SpacReportWriter.test.ts @@ -10,6 +10,7 @@ import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; import { setupAllDatabases } from "../../config/setupAllDatabases"; import { SpacRepo } from "./SpacRepo"; import { SpacReportWriter } from "./SpacReportWriter"; +import { SpacMergerExtractionRepo } from "./SpacMergerExtractionRepo"; import { CHANGE_LOG_REPOSITORY_TOKEN } from "../change-tracking/ChangeLogSchema"; describe("SpacReportWriter", () => { @@ -263,4 +264,68 @@ describe("SpacReportWriter", () => { expect(await repo.getSpac(12)).toBeUndefined(); expect(await repo.getEvents(12)).toEqual([]); }); + + it("derives target/pipe + proxy from a recorded merger proxy and rolls up", async () => { + await writer.recordRegistration({ + cik: 20, accession_number: "20-reg", filing_date: "2020-12-01", form: "S-1", + primary_document: "s1.htm", spac_name: "Merge SPAC", spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik: 20, accession_number: "20-da", filing_date: "2021-03-05", form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + + await new SpacMergerExtractionRepo().save({ + accession_number: "20-defm", cik: 20, form: "DEFM14A", filing_date: "2021-05-01", + extractor_id: "merger-proxy", extractor_version: "1.0.0", + target_name: "Acme Target Inc.", target_cik: 999, target_observation_id: 1, + pipe_amount: 150_000_000, merger_consideration: "$10.00 per share in stock", + confidence: 0.95, source_span: "merger with Acme Target Inc.", model_id: "claude-sonnet-4-6", + created_at: new Date().toISOString(), + }); + await writer.recordMergerProxy({ + cik: 20, accession_number: "20-defm", filing_date: "2021-05-01", + form: "DEFM14A", primary_document: "defm.htm", emitProxyEvent: true, + }); + + const row = await repo.getSpac(20); + expect(row?.status).toBe("proxy"); + expect(row?.target_name).toBe("Acme Target Inc."); + expect(row?.pipe_amount).toBe(150_000_000); + expect(row?.proxy_date).toBe("2021-05-01"); + + const deals = await repo.getDeals(20); + expect(deals[0].target_name).toBe("Acme Target Inc."); + expect(deals[0].target_cik).toBe(999); + }); + + it("does not emit a proxy event for a preliminary proxy (PREM14A)", async () => { + await writer.recordRegistration({ + cik: 21, accession_number: "21-reg", filing_date: "2020-12-01", form: "S-1", + primary_document: "s1.htm", spac_name: "Merge SPAC", spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik: 21, accession_number: "21-da", filing_date: "2021-03-05", form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + await new SpacMergerExtractionRepo().save({ + accession_number: "21-prem", cik: 21, form: "PREM14A", filing_date: "2021-04-01", + extractor_id: "merger-proxy", extractor_version: "1.0.0", + target_name: "Acme Target Inc.", target_cik: null, target_observation_id: null, + pipe_amount: null, merger_consideration: null, confidence: 0.9, source_span: null, + model_id: null, created_at: new Date().toISOString(), + }); + await writer.recordMergerProxy({ + cik: 21, accession_number: "21-prem", filing_date: "2021-04-01", + form: "PREM14A", primary_document: "prem.htm", emitProxyEvent: false, + }); + + const events = await repo.getEvents(21); + expect(events.some((e) => e.event_type === "proxy")).toBe(false); + const row = await repo.getSpac(21); + expect(row?.target_name).toBe("Acme Target Inc."); // still correlated + expect(row?.status).toBe("deal_announced"); // no proxy event -> not "proxy" + }); }); diff --git a/src/storage/spac/SpacReportWriter.ts b/src/storage/spac/SpacReportWriter.ts index 9510878..5003d49 100644 --- a/src/storage/spac/SpacReportWriter.ts +++ b/src/storage/spac/SpacReportWriter.ts @@ -7,7 +7,8 @@ import { globalServiceRegistry, uuid4 } from "workglow"; import { SpacRepo } from "./SpacRepo"; import { buildSpacRow, type SpacRowPatch } from "./spacRollup"; -import { deriveDealsFromEvents } from "./spacDealGrouping"; +import { deriveDeals } from "./spacDealGrouping"; +import { SpacMergerExtractionRepo } from "./SpacMergerExtractionRepo"; import type { Spac } from "./SpacSchema"; import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; import type { SpacHistory } from "./SpacHistorySchema"; @@ -44,6 +45,16 @@ interface RecordDealMilestonesArgs { readonly events: readonly { event_type: SpacEventType; event_date: string }[]; } +interface RecordMergerProxyArgs { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + readonly primary_document: string | null; + /** true for DEFM14A (definitive); false for PREM14A (preliminary). */ + readonly emitProxyEvent: boolean; +} + /** Fields compared for ChangeLog/history; everything except the volatile timestamp. */ const TRACKED_FIELDS: readonly (keyof Spac)[] = [ "current_cik", "status", "spac_name", "target_name", "surviving_name", "current_name", @@ -60,6 +71,7 @@ const TRACKED_FIELDS: readonly (keyof Spac)[] = [ */ export class SpacReportWriter { private readonly repo: SpacRepo; + private readonly mergerExtractions = new SpacMergerExtractionRepo(); constructor(repo: SpacRepo = new SpacRepo()) { this.repo = repo; @@ -117,15 +129,46 @@ export class SpacReportWriter { primary_document: args.primary_document, }); } - const events = await this.repo.getEvents(args.cik); - const existingDeals = await this.repo.getDeals(args.cik); - const deals = deriveDealsFromEvents(args.cik, events, existingDeals); - for (const deal of deals) { - await this.repo.saveDeal(deal); + await this.recomputeAndSaveDeals(args.cik); + await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); + } + + /** + * Record a merger proxy: emit a `proxy` event for the definitive proxy + * (DEFM14A), recompute deals from the event stream + stored merger + * extractions (correlation derives target/pipe), then rebuild the row. The + * extraction itself is persisted by the caller (`processMergerProxy`) before + * this runs. + */ + async recordMergerProxy(args: RecordMergerProxyArgs): Promise { + if (args.emitProxyEvent) { + await this.appendEvent({ + cik: args.cik, + accession_number: args.accession_number, + event_type: "proxy", + event_date: args.filing_date, + form: args.form, + primary_document: args.primary_document, + }); } + await this.recomputeAndSaveDeals(args.cik); await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); } + /** + * Rebuild the deal set from the CIK's full event stream + merger extractions + * (the single derivation path shared by the 8-K and merger-proxy writers). + */ + private async recomputeAndSaveDeals(cik: number): Promise { + const [events, extractions, existingDeals] = await Promise.all([ + this.repo.getEvents(cik), + this.mergerExtractions.getByCik(cik), + this.repo.getDeals(cik), + ]); + const deals = deriveDeals(cik, events, extractions, existingDeals); + for (const deal of deals) await this.repo.saveDeal(deal); + } + private async appendEvent( partial: Pick & Partial diff --git a/src/storage/spac/spacDealGrouping.test.ts b/src/storage/spac/spacDealGrouping.test.ts index 38d8af6..dbaeace 100644 --- a/src/storage/spac/spacDealGrouping.test.ts +++ b/src/storage/spac/spacDealGrouping.test.ts @@ -153,7 +153,9 @@ describe("deriveDeals", () => { [], [] ); - expect(shuffled).toEqual(ordered); + // created_at is a wall-clock stamp for new rows; compare the derived fields. + const strip = (ds: typeof ordered) => ds.map(({ created_at, ...rest }) => rest); + expect(strip(shuffled)).toEqual(strip(ordered)); }); it("preserves created_at from an existing deal row", () => { From 1de72a2a1040bc6ddc313ce75047e28ae2bf92b3 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 15:52:33 +0000 Subject: [PATCH 14/39] feat(spac): merger-deal extractor + merger-proxy section headings --- .../s1/DocumentSegmenter.ts | 14 ++++++++ .../s1/mergerDealSchema.ts | 23 +++++++++++++ .../s1/sectionExtractors.test.ts | 32 +++++++++++++++++++ .../s1/sectionExtractors.ts | 19 +++++++++++ 4 files changed, 88 insertions(+) create mode 100644 src/sec/forms/registration-statements/s1/mergerDealSchema.ts diff --git a/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts b/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts index 8c1430a..7182859 100644 --- a/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts +++ b/src/sec/forms/registration-statements/s1/DocumentSegmenter.ts @@ -15,6 +15,10 @@ export const S1_SECTIONS = { UNDERWRITING: "Underwriting", USE_OF_PROCEEDS: "Use of Proceeds", THE_SPONSOR: "The Sponsor", + // Merger-proxy (DEFM14A/PREM14A) sections; read by the merger-proxy extractor. + THE_MERGER: "The Merger", + BUSINESS_COMBINATION: "The Business Combination", + PIPE_FINANCING: "PIPE Financing", } as const; export type S1SectionName = (typeof S1_SECTIONS)[keyof typeof S1_SECTIONS]; @@ -51,6 +55,16 @@ export const SECTION_HEADING_PATTERNS: Readonly + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import { TypeNullable } from "../../../../util/TypeBoxUtil"; + +/** The single merger-deal object the model returns from a merger proxy. */ +export const MergerDealOutputSchema = Type.Object({ + target_name: TypeNullable( + Type.String({ description: "Operating company the SPAC will merge with" }) + ), + pipe_amount: TypeNullable(Type.Number({ description: "Total PIPE investment in dollars" })), + merger_consideration: TypeNullable( + Type.String({ description: "Short verbatim description of the consideration" }) + ), + confidence: Type.Number({ minimum: 0, maximum: 1 }), + source_span: TypeNullable(Type.String()), +}); + +export type MergerDealRow = Static; diff --git a/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts b/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts index d8e4e34..232bb8b 100644 --- a/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts +++ b/src/sec/forms/registration-statements/s1/sectionExtractors.test.ts @@ -13,6 +13,7 @@ import { extractOfferingTerms, extractUnderwriters, extractUseOfProceeds, + extractMergerDeal, } from "./sectionExtractors"; import { fakeS1Model, registerFakeStructuredProvider } from "./testing/fakeStructuredProvider"; @@ -131,6 +132,37 @@ it("extractOfferingTerms returns the parsed offering object", async () => { } }); +it("extractMergerDeal returns the parsed merger object", async () => { + const { unregister } = registerFakeStructuredProvider([ + { + target_name: "Acme Target Inc.", + pipe_amount: 150000000, + merger_consideration: "$10.00 per share in stock", + confidence: 0.92, + source_span: "merger with Acme Target Inc.", + }, + ]); + try { + const got = await extractMergerDeal("THE MERGER ...", fakeS1Model()); + expect(got?.target_name).toBe("Acme Target Inc."); + expect(got?.pipe_amount).toBe(150000000); + } finally { + unregister(); + } +}); + +it("extractMergerDeal throws on schema-invalid model output (caller dead-letters it)", async () => { + // Missing the required `confidence` field -> schema validation rejects it. + const { unregister } = registerFakeStructuredProvider([ + { target_name: "Acme Target Inc.", pipe_amount: null, merger_consideration: null }, + ]); + try { + await expect(extractMergerDeal("THE MERGER ...", fakeS1Model())).rejects.toThrow(); + } finally { + unregister(); + } +}); + it("extractOfferingTerms throws on schema-invalid model output (caller dead-letters it)", async () => { const { unregister } = registerFakeStructuredProvider([{ tickers: [] }]); try { diff --git a/src/sec/forms/registration-statements/s1/sectionExtractors.ts b/src/sec/forms/registration-statements/s1/sectionExtractors.ts index bde0e20..c974870 100644 --- a/src/sec/forms/registration-statements/s1/sectionExtractors.ts +++ b/src/sec/forms/registration-statements/s1/sectionExtractors.ts @@ -18,6 +18,7 @@ import { SpacSponsorOutputSchema, type SpacSponsorRow } from "./spacSponsorSchem import { OfferingTermsOutputSchema, type OfferingTermsRow } from "./offeringTermsSchema"; import { UnderwriterOutputSchema, type UnderwriterRowOut } from "./underwriterSchema"; import { UseOfProceedsOutputSchema, type UseOfProceedsLineRow } from "./useOfProceedsSchema"; +import { MergerDealOutputSchema, type MergerDealRow } from "./mergerDealSchema"; const MAX_TOKENS = 4096; @@ -211,6 +212,24 @@ export async function extractSpacSponsors( return (obj.sponsors as SpacSponsorRow[] | undefined) ?? []; } +export async function extractMergerDeal( + sectionText: string, + model: ModelConfig +): Promise { + const instructions = + "The text between the tags below is from a SPAC merger proxy (DEFM14A/PREM14A). " + + "Identify the business-combination target and deal terms. Give target_name (the " + + "operating company the SPAC will merge with), pipe_amount (the total PIPE " + + "investment in dollars, or null), merger_consideration (a short verbatim phrase " + + "describing the consideration — e.g. cash, stock, exchange ratio — or null), a " + + "confidence in [0,1], and the verbatim source_span you drew the target from. " + + "Return JSON matching the schema."; + const prompt = `${UNTRUSTED_PREAMBLE}\n\n${instructions}\n\n${wrapUntrusted(sectionText)}`; + const obj = await runStructured(model, prompt, MergerDealOutputSchema); + if (obj.confidence == null || obj.source_span == null) return null; + return obj as unknown as MergerDealRow; +} + export async function extractUseOfProceeds( sectionText: string, model: ModelConfig From 56366531007cf19d748378a18663d6e4bcd8c787 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 15:55:34 +0000 Subject: [PATCH 15/39] feat(spac): processMergerProxy + DEFM14A/PREM14A parse + dispatch wiring --- .../Form_DEFM14A.storage.ts | 187 ++++++++++++++++++ .../Form_DEFM14A.ts | 6 + .../Form_PREM14A.ts | 6 + .../registration-statements/s1/mergerModel.ts | 31 +++ src/storage/versioning/extractorIds.test.ts | 7 + src/storage/versioning/extractorIds.ts | 3 + src/task/forms/ProcessAccessionDocFormTask.ts | 5 + 7 files changed, 245 insertions(+) create mode 100644 src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts create mode 100644 src/sec/forms/registration-statements/s1/mergerModel.ts diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts new file mode 100644 index 0000000..bc6ec43 --- /dev/null +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts @@ -0,0 +1,187 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { globalServiceRegistry, type ModelConfig } from "workglow"; +import { buildEntityObserver } from "../../../resolver/buildEntityObserver"; +import { CanonicalCompanyRepo } from "../../../storage/canonical/CanonicalCompanyRepo"; +import { COMPONENT_VERSION_REPOSITORY_TOKEN } from "../../../storage/versioning/ComponentVersionSchema"; +import { VersionRegistry } from "../../../storage/versioning/VersionRegistry"; +import { getActiveSlot } from "../../../storage/versioning/getActiveSlot"; +import { ObservationProvenanceRepo } from "../../../storage/provenance/ObservationProvenanceRepo"; +import { ExtractionDeadLetterRepo } from "../../../storage/dead-letter/ExtractionDeadLetterRepo"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacMergerExtractionRepo } from "../../../storage/spac/SpacMergerExtractionRepo"; +import { parseEdgarHtml } from "../../html/parseEdgarHtml"; +import { DocumentTreeSegmenter } from "../registration-statements/s1/DocumentTreeSegmenter"; +import { S1_SECTIONS, type S1SectionName } from "../registration-statements/s1/DocumentSegmenter"; +import { makeRunSection } from "../registration-statements/s1/sectionRunner"; +import { spanAppearsIn } from "../registration-statements/s1/verifySourceSpan"; +import { extractMergerDeal } from "../registration-statements/s1/sectionExtractors"; +import type { MergerDealRow } from "../registration-statements/s1/mergerDealSchema"; +import { getMergerProxyModel, resolveModelId } from "../registration-statements/s1/mergerModel"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; + +const EXTRACTOR_ID = "merger-proxy"; +const DEFAULT_EXTRACTOR_VERSION = "1.0.0"; +const MERGER_SECTION = "merger"; +/** Definitive proxies emit a `proxy` lifecycle event; preliminary ones do not. */ +const DEFINITIVE_PROXY_FORMS = new Set(["DEFM14A"]); + +export interface ProcessMergerProxyArgs { + readonly cik: number; + readonly file_number: string; + readonly accession_number: string; + readonly filing_date: string; + readonly primary_doc: string; + readonly form: string; + readonly formMergerProxy: FormS1Parsed; + readonly model?: ModelConfig; +} + +/** + * Extract the deal identity + PIPE from a SPAC merger proxy (DEFM14A/PREM14A). + * Gated on a known SPAC. Persists a `spac_merger_extraction` row, observes the + * target company, then records the proxy event and recomputes deals (correlation + * derives target/pipe onto the matching `spac_deal`). Degrades gracefully: when + * the merger section is absent or low-confidence, it dead-letters and still emits + * the proxy event so `proxy_date` advances. + */ +export async function processMergerProxy(args: ProcessMergerProxyArgs): Promise { + const { cik, accession_number, form, filing_date, formMergerProxy } = args; + + // Gate: known SPACs only (the proxy filer is always the SPAC). + const spacRow = await new SpacRepo().getSpac(cik); + if (!spacRow) return; + + const versionRegistry = new VersionRegistry( + globalServiceRegistry.get(COMPONENT_VERSION_REPOSITORY_TOKEN) + ); + const [extractorSlot, personSlot, companySlot] = await Promise.all([ + getActiveSlot(versionRegistry, "extractor", EXTRACTOR_ID), + getActiveSlot(versionRegistry, "resolver", "person"), + getActiveSlot(versionRegistry, "resolver", "company"), + ]); + const extractor_version = extractorSlot?.semver ?? DEFAULT_EXTRACTOR_VERSION; + const observer = buildEntityObserver({ + activeResolverPersonVersion: personSlot?.semver ?? "1.0.0", + activeResolverCompanyVersion: companySlot?.semver ?? "1.0.0", + }); + const provenance = new ObservationProvenanceRepo(); + const deadLetters = new ExtractionDeadLetterRepo(); + const model = args.model ?? (await getMergerProxyModel()); + const model_id = resolveModelId(model); + + // Segment; PARSE_ERROR dead-letters the merger section so a retry can resolve it. + let byName: Map; + try { + const doc = parseEdgarHtml(formMergerProxy.html, `${form} ${accession_number}`); + const sections = new DocumentTreeSegmenter().segment(doc); + byName = new Map(sections.map((s) => [s.name, s.text])); + } catch (err) { + await deadLetters.record({ + extractor_id: EXTRACTOR_ID, + accession_number, + section_name: MERGER_SECTION, + reason_code: "PARSE_ERROR", + detail: err instanceof Error ? err.message : String(err), + failed_extractor_version: extractor_version, + source_run_id: null, + }); + return; + } + + // Prefer the merger / business-combination / PIPE sections; concatenate when + // multiple are present. (No whole-document fallback: proxies are huge.) + const mergerText = [ + byName.get(S1_SECTIONS.THE_MERGER), + byName.get(S1_SECTIONS.BUSINESS_COMBINATION), + byName.get(S1_SECTIONS.PIPE_FINANCING), + ] + .filter((t): t is string => typeof t === "string") + .join("\n\n"); + + const runSection = makeRunSection({ + deadLetters, + extractor_id: EXTRACTOR_ID, + extractor_version, + accession_number, + }); + let idx = 0; + + await runSection({ + sectionName: MERGER_SECTION, + text: mergerText === "" ? undefined : mergerText, + notFoundDetail: "no merger / business-combination / PIPE section text", + emptyDetail: "no merger deal returned", + lowConfidenceDetail: "below confidence floor", + verifyRow: (text, r) => spanAppearsIn(text, r.source_span), + unverifiedAllDetail: "merger deal source_span not present in section text", + extract: async (text) => { + const deal = await extractMergerDeal(text, model); + return deal === null ? [] : [deal]; + }, + persist: async (rows) => { + const deal = rows[0]; + const now = new Date().toISOString(); + let target_observation_id: number | null = null; + let target_cik: number | null = null; + const targetName = deal.target_name?.trim() ?? ""; + if (targetName !== "") { + const { observation_id, canonical_company_id } = await observer.observeCompany({ + accession_number, + extractor_id: EXTRACTOR_ID, + extractor_version, + observation_index: idx++, + name: targetName, + source_context: JSON.stringify({ relation: "merger-proxy:target" }), + }); + target_observation_id = observation_id; + // target_cik only when the resolved canonical company already carries one. + const canon = await new CanonicalCompanyRepo().getById(canonical_company_id); + target_cik = canon?.cik ?? null; + await provenance.save({ + kind: "company", + observation_id, + confidence: deal.confidence, + source_span: deal.source_span, + section_name: MERGER_SECTION, + model_id, + prompt_version: extractor_version, + extra: null, + }); + } + await new SpacMergerExtractionRepo().save({ + accession_number, + cik, + form, + filing_date, + extractor_id: EXTRACTOR_ID, + extractor_version, + target_name: targetName === "" ? null : targetName, + target_cik, + target_observation_id, + pipe_amount: deal.pipe_amount, + merger_consideration: deal.merger_consideration, + confidence: deal.confidence, + source_span: deal.source_span, + model_id, + created_at: now, + }); + return 1; + }, + }); + + // Emit the proxy event (definitive only) + recompute/correlate + rebuild. + await new SpacReportWriter().recordMergerProxy({ + cik, + accession_number, + filing_date, + form, + primary_document: args.primary_doc ?? null, + emitProxyEvent: DEFINITIVE_PROXY_FORMS.has(form), + }); +} diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts index efcfbaf..74e188f 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_DEFM14A extends Form { static readonly name = "Definitive Proxy Statement for Merger or Acquisition"; static readonly description = "Provides official notification to designated classes of shareholders of matters relating to a merger or acquisition."; static readonly forms = ["DEFM14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_PREM14A.ts b/src/sec/forms/proxies-information-statements/Form_PREM14A.ts index 2783f88..cf674d4 100644 --- a/src/sec/forms/proxies-information-statements/Form_PREM14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_PREM14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_PREM14A extends Form { static readonly name = "Preliminary Proxy Statement for Merger or Acquisition"; static readonly description = "A preliminary proxy statement relating to a merger or acquisition."; static readonly forms = ["PREM14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/registration-statements/s1/mergerModel.ts b/src/sec/forms/registration-statements/s1/mergerModel.ts new file mode 100644 index 0000000..9d04ec0 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/mergerModel.ts @@ -0,0 +1,31 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { ModelConfig } from "workglow"; +import { getGlobalModelRepository } from "workglow"; +import { resolveModelId } from "./s1Model"; + +export { resolveModelId }; + +const DEFAULT_MERGER_PROXY_MODEL = "claude-sonnet-4-6"; + +/** The model id used for merger-proxy extraction; overridable via SEC_MERGER_PROXY_MODEL. */ +export function getMergerProxyModelId(): string { + const id = (process.env.SEC_MERGER_PROXY_MODEL ?? "").trim(); + return id === "" ? DEFAULT_MERGER_PROXY_MODEL : id; +} + +/** Resolves the configured merger-proxy model into a ModelConfig. */ +export async function getMergerProxyModel(): Promise { + const id = getMergerProxyModelId(); + const record = await getGlobalModelRepository().findByName(id); + if (!record) { + throw new Error( + `Merger-proxy model '${id}' is not registered. Register it or set SEC_MERGER_PROXY_MODEL to a known model id.` + ); + } + return record as ModelConfig; +} diff --git a/src/storage/versioning/extractorIds.test.ts b/src/storage/versioning/extractorIds.test.ts index a1f8bf1..3c410d7 100644 --- a/src/storage/versioning/extractorIds.test.ts +++ b/src/storage/versioning/extractorIds.test.ts @@ -25,9 +25,16 @@ describe("extractorIds", () => { "CFPORTAL", "D", "S-1", + "merger-proxy", ]); }); + it("maps the merger proxies to extractor id 'merger-proxy'", () => { + for (const form of ["DEFM14A", "PREM14A"]) { + expect(formToExtractorId(form)).toBe("merger-proxy"); + } + }); + it("maps the 424 prospectus variants to extractor id '424'", () => { for (const form of ["424A", "424B1", "424B2", "424B3", "424B4", "424B5", "424B7"]) { expect(formToExtractorId(form)).toBe("424"); diff --git a/src/storage/versioning/extractorIds.ts b/src/storage/versioning/extractorIds.ts index 28c1072..6f605e0 100644 --- a/src/storage/versioning/extractorIds.ts +++ b/src/storage/versioning/extractorIds.ts @@ -18,6 +18,7 @@ export const EXTRACTOR_IDS = [ "S-1", "424", "8-K", + "merger-proxy", ] as const; export type ExtractorId = (typeof EXTRACTOR_IDS)[number]; @@ -77,6 +78,8 @@ export const FORM_TO_EXTRACTOR_ID: Readonly> = { "424B7": "424", "8-K": "8-K", "8-K/A": "8-K", + DEFM14A: "merger-proxy", + PREM14A: "merger-proxy", }; export function formToExtractorId(form: string): ExtractorId | undefined { diff --git a/src/task/forms/ProcessAccessionDocFormTask.ts b/src/task/forms/ProcessAccessionDocFormTask.ts index 750baac..335595a 100644 --- a/src/task/forms/ProcessAccessionDocFormTask.ts +++ b/src/task/forms/ProcessAccessionDocFormTask.ts @@ -25,6 +25,7 @@ import { processForm144 } from "../../sec/forms/insider-trading/Form_144.storage import { processFormS1 } from "../../sec/forms/registration-statements/Form_S_1.storage"; import { processForm424 } from "../../sec/forms/registration-statements/Form_424.storage"; import { processForm8K } from "../../sec/forms/miscellaneous-filings/Form_8_K.storage"; +import { processMergerProxy } from "../../sec/forms/proxies-information-statements/Form_DEFM14A.storage"; import { TypeSecCik } from "../../sec/submissions/EnititySubmissionSchema"; import { ExtractionDeadLetterRepo } from "../../storage/dead-letter/ExtractionDeadLetterRepo"; import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; @@ -357,6 +358,10 @@ export class ProcessAccessionDocFormTask extends Task< case "8-K/A": await processForm8K({ ...storageArgs, form: form!, items, report_date, form8K: parsed }); break; + case "DEFM14A": + case "PREM14A": + await processMergerProxy({ ...storageArgs, form: form!, formMergerProxy: parsed }); + break; default: throw new TaskError(`Form '${form}' has no storage handler`); } From 754cc1f4c664142988d0c18814e782158334bc27 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 15:57:29 +0000 Subject: [PATCH 16/39] test(spac): end-to-end DEFM14A merger-proxy extraction fixture --- .../Form_DEFM14A.storage.e2e.test.ts | 130 ++++++++++++++++++ .../mock_data/merger-proxy/SOURCES.md | 21 +++ .../mock_data/merger-proxy/defm14a_sample.txt | 39 ++++++ 3 files changed, 190 insertions(+) create mode 100644 src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts create mode 100644 src/sec/forms/proxies-information-statements/mock_data/merger-proxy/SOURCES.md create mode 100644 src/sec/forms/proxies-information-statements/mock_data/merger-proxy/defm14a_sample.txt diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts new file mode 100644 index 0000000..df37a37 --- /dev/null +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts @@ -0,0 +1,130 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacMergerExtractionRepo } from "../../../storage/spac/SpacMergerExtractionRepo"; +import { + fakeS1Model, + registerFakeStructuredProvider, +} from "../registration-statements/s1/testing/fakeStructuredProvider"; +import { Form_DEFM14A } from "./Form_DEFM14A"; +import { processMergerProxy } from "./Form_DEFM14A.storage"; + +const FIXTURE = `${import.meta.dir}/mock_data/merger-proxy/defm14a_sample.txt`; + +// The stub model returns a fixed merger deal; source_span must appear verbatim +// in the fixture's "The Business Combination" section text (verifyRow gate). +function scriptMergerDeal(): () => void { + const { unregister } = registerFakeStructuredProvider([ + { + target_name: "Acme Target Inc.", + pipe_amount: 150000000, + merger_consideration: "$10.00 per share in stock", + confidence: 0.95, + source_span: "business combination with Acme Target Inc.", + }, + ]); + return unregister; +} + +describe("processMergerProxy (e2e)", () => { + let repo: SpacRepo; + let cleanup: (() => void) | undefined; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacRepo(); + }); + afterEach(() => { + cleanup?.(); + cleanup = undefined; + }); + + async function seedSpacWithOpenDeal(cik: number): Promise { + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2020-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Merge SPAC Inc.", + spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2021-03-05", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2021-03-01" }], + }); + } + + async function runProxy(cik: number, accession_number: string): Promise { + const txt = await Bun.file(FIXTURE).text(); + const parsed = await Form_DEFM14A.parse("DEFM14A", txt); + await processMergerProxy({ + cik, + file_number: "", + accession_number, + filing_date: "2021-05-01", + primary_doc: "defm14a.htm", + form: "DEFM14A", + formMergerProxy: parsed, + model: fakeS1Model(), + }); + } + + it("extracts the target/pipe, correlates onto the deal, and rolls up to proxy", async () => { + await seedSpacWithOpenDeal(100); + cleanup = scriptMergerDeal(); + await runProxy(100, "100-defm"); + + const extraction = await new SpacMergerExtractionRepo().getByAccession("100-defm"); + expect(extraction?.target_name).toBe("Acme Target Inc."); + expect(extraction?.pipe_amount).toBe(150000000); + expect(extraction?.merger_consideration).toBe("$10.00 per share in stock"); + + const deals = await repo.getDeals(100); + expect(deals).toHaveLength(1); + expect(deals[0].target_name).toBe("Acme Target Inc."); + expect(deals[0].pipe_amount).toBe(150000000); + + const events = await repo.getEvents(100); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); + + const row = await repo.getSpac(100); + expect(row?.status).toBe("proxy"); + expect(row?.target_name).toBe("Acme Target Inc."); + expect(row?.pipe_amount).toBe(150000000); + expect(row?.proxy_date).toBe("2021-05-01"); + }); + + it("writes nothing for a CIK with no spac row (gate)", async () => { + cleanup = scriptMergerDeal(); + await runProxy(200, "200-defm"); + expect(await repo.getSpac(200)).toBeUndefined(); + expect(await new SpacMergerExtractionRepo().getByAccession("200-defm")).toBeUndefined(); + expect(await repo.getEvents(200)).toEqual([]); + }); + + it("is idempotent when the same proxy is reprocessed", async () => { + await seedSpacWithOpenDeal(300); + cleanup = scriptMergerDeal(); + await runProxy(300, "300-defm"); + await runProxy(300, "300-defm"); + + const events = await repo.getEvents(300); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); + expect(await repo.getDeals(300)).toHaveLength(1); + }); +}); diff --git a/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/SOURCES.md b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/SOURCES.md new file mode 100644 index 0000000..6b1c242 --- /dev/null +++ b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/SOURCES.md @@ -0,0 +1,21 @@ +# Merger-proxy (DEFM14A/PREM14A) fixtures + +## `defm14a_sample.txt` + +A **compact, hand-authored** DEFM14A full-submission fixture modeled on the real +EDGAR SGML structure (`` + a ``/`DEFM14A`/`` +envelope wrapping the primary HTML). It contains realistic `The Business +Combination` and `PIPE Financing` section headings with body prose naming a +target and a PIPE amount. + +It exists to exercise the **plumbing** of the merger-proxy path end to end — +`parseRegistrationSubmission` → `parseEdgarHtml` → `DocumentTreeSegmenter` → +section runner → `spac_merger_extraction` persistence → deal correlation → SPAC +rollup — under a **stubbed** structured-generation model (so the assertions do +not depend on a live LLM). It is **not** a verbatim EDGAR document and is not a +golden parser sample. + +Refreshing this tree with trimmed **real** SPAC DEFM14A/PREM14A submissions (for a +golden parser/segmenter test against authentic prospectus HTML) is future work; a +real full submission is multiple megabytes, so any committed real sample must be +trimmed to the merger/PIPE sections while keeping valid SGML + HTML. diff --git a/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/defm14a_sample.txt b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/defm14a_sample.txt new file mode 100644 index 0000000..c84b14c --- /dev/null +++ b/src/sec/forms/proxies-information-statements/mock_data/merger-proxy/defm14a_sample.txt @@ -0,0 +1,39 @@ +0001234567-21-000001.hdr.sgml : 20210501 +ACCESSION NUMBER: 0001234567-21-000001 +CONFORMED SUBMISSION TYPE: DEFM14A +PUBLIC DOCUMENT COUNT: 1 +CONFORMED PERIOD OF REPORT: 20210501 +FILED AS OF DATE: 20210501 +FILER: + COMPANY DATA: + COMPANY CONFORMED NAME: MERGE SPAC INC. + CENTRAL INDEX KEY: 0001234567 + STANDARD INDUSTRIAL CLASSIFICATION: BLANK CHECKS [6770] + STATE OF INCORPORATION: DE + + +DEFM14A +1 +defm14a.htm + + +Merge SPAC Inc. — Definitive Merger Proxy + +

The Business Combination

+

+Merge SPAC Inc., a blank check company, has entered into a business combination +with Acme Target Inc., a leading operating company in its sector. Upon the closing +of the business combination with Acme Target Inc., the post-combination company +will continue the business of Acme Target Inc. +

+

PIPE Financing

+

+In connection with the business combination, certain institutional investors have +agreed to purchase, in a private placement, shares for an aggregate PIPE investment +of $150,000,000, to be funded immediately prior to the closing of the business +combination. +

+ + +
+
From bf80496fe645938fdfc5033430a7b9faccb21ffa Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 15:58:26 +0000 Subject: [PATCH 17/39] docs(spac): document merger-proxy extractor in CLAUDE.md --- CLAUDE.md | 20 +++++++++++++++++++ .../versioning/componentRegistry.test.ts | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 747d299..1a8b2a2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -186,6 +186,26 @@ redemption amounts stay null until the narrative/AI extractors (S-4 / DEFM14A / 425) land — 8-K item codes carry no names or amounts. Still deferred: name/SIC/ ticker transitions and Form 25/15 de-registration. +**Merger proxies** (`DEFM14A`, `PREM14A`; extractor id `merger-proxy`) run +`processMergerProxy` (known SPACs only — a `spac` row must already exist): AI +extraction over the merger / business-combination / PIPE sections records a +per-accession `spac_merger_extraction` row (target name/CIK, PIPE amount, merger +consideration) and observes the target company (`relation: "merger-proxy:target"`, +`target_cik` resolved from the canonical company when it has one). `deriveDeals` +correlates each extraction onto the matching `spac_deal` by filing-date window — +*deriving* `target_name` / `target_cik` / `pipe_amount` (definitive DEFM14A +supersedes preliminary PREM14A), which retires the 8-K path's positional +merge-preserve. DEFM14A also emits a `proxy` event (→ `proxy_date` / +`status = proxy`); PREM14A emits none. Redemption actuals stay null (post-vote +8-K, deferred) and S-4 is deferred (newco-CIK linkage). Configure the model via +`SEC_MERGER_PROXY_MODEL` (default `claude-sonnet-4-6`). + +```bash +sec fetch form DEFM14A # fetch + extract a merger proxy +sec extractor dead-letters merger-proxy # version-fixable extraction failures +sec extractor retry-dead-letters merger-proxy +``` + ```bash sec spac report [--format json] # consolidated report sec spac history [--format json] # state-change history diff --git a/src/storage/versioning/componentRegistry.test.ts b/src/storage/versioning/componentRegistry.test.ts index 4116a8a..a6e7818 100644 --- a/src/storage/versioning/componentRegistry.test.ts +++ b/src/storage/versioning/componentRegistry.test.ts @@ -41,7 +41,7 @@ describe("componentRegistry", () => { }); it("listRegisteredComponents returns one entry per extractor and resolver", () => { - // 13 extractors + 4 resolvers (person, company, sponsor-family, underwriter-family). - expect(listRegisteredComponents()).toHaveLength(17); + // 14 extractors + 4 resolvers (person, company, sponsor-family, underwriter-family). + expect(listRegisteredComponents()).toHaveLength(18); }); }); From 2f4dd59246383c875fa7aeeb0f2b970ba67295e6 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 19:28:36 +0000 Subject: [PATCH 18/39] feat(spac): make section-runner confidence floor per-call tunable Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../s1/sectionRunner.test.ts | 95 +++++++++++++++++++ .../s1/sectionRunner.ts | 21 +++- 2 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 src/sec/forms/registration-statements/s1/sectionRunner.test.ts diff --git a/src/sec/forms/registration-statements/s1/sectionRunner.test.ts b/src/sec/forms/registration-statements/s1/sectionRunner.test.ts new file mode 100644 index 0000000..bf01496 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/sectionRunner.test.ts @@ -0,0 +1,95 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from "bun:test"; +import { makeRunSection, parseConfidenceFloor } from "./sectionRunner"; +import type { ExtractionDeadLetterRepo } from "../../../../storage/dead-letter/ExtractionDeadLetterRepo"; + +interface RecordedLetter { + section_name: string; + reason_code: string; +} + +/** Minimal stub: runSection only calls `record` and `markResolved`. */ +function stubDeadLetters(): { repo: ExtractionDeadLetterRepo; letters: RecordedLetter[]; resolved: string[] } { + const letters: RecordedLetter[] = []; + const resolved: string[] = []; + const repo = { + record: async (args: { section_name: string; reason_code: string }) => { + letters.push({ section_name: args.section_name, reason_code: args.reason_code }); + }, + markResolved: async (_id: string, _acc: string, section: string) => { + resolved.push(section); + }, + } as unknown as ExtractionDeadLetterRepo; + return { repo, letters, resolved }; +} + +describe("parseConfidenceFloor", () => { + it("returns the fallback for undefined, empty, or non-numeric input", () => { + expect(parseConfidenceFloor(undefined, 0.3)).toBe(0.3); + expect(parseConfidenceFloor("", 0.3)).toBe(0.3); + expect(parseConfidenceFloor(" ", 0.3)).toBe(0.3); + expect(parseConfidenceFloor("abc", 0.3)).toBe(0.3); + }); + it("parses a numeric floor", () => { + expect(parseConfidenceFloor("0.8", 0)).toBe(0.8); + expect(parseConfidenceFloor("0", 0.5)).toBe(0); + }); +}); + +describe("makeRunSection confidenceFloor", () => { + const baseRow = { confidence: 0.5, value: 1 }; + + it("dead-letters LOW_CONFIDENCE_ALL when rows fall below an explicit floor", async () => { + const { repo, letters } = stubDeadLetters(); + let persisted = 0; + const runSection = makeRunSection({ + deadLetters: repo, + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + accession_number: "acc-1", + confidenceFloor: 0.8, + }); + await runSection({ + sectionName: "merger", + text: "some text", + emptyDetail: "empty", + lowConfidenceDetail: "all rows below confidence floor", + extract: async () => [baseRow], + persist: async () => { + persisted++; + return 1; + }, + }); + expect(persisted).toBe(0); + expect(letters).toEqual([{ section_name: "merger", reason_code: "LOW_CONFIDENCE_ALL" }]); + }); + + it("persists the same rows under the default floor (0)", async () => { + const { repo, resolved } = stubDeadLetters(); + let persisted = 0; + const runSection = makeRunSection({ + deadLetters: repo, + extractor_id: "merger-proxy", + extractor_version: "1.0.0", + accession_number: "acc-2", + }); + await runSection({ + sectionName: "merger", + text: "some text", + emptyDetail: "empty", + lowConfidenceDetail: "all rows below confidence floor", + extract: async () => [baseRow], + persist: async () => { + persisted++; + return 1; + }, + }); + expect(persisted).toBe(1); + expect(resolved).toEqual(["merger"]); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/sectionRunner.ts b/src/sec/forms/registration-statements/s1/sectionRunner.ts index e6c3b95..b4daa34 100644 --- a/src/sec/forms/registration-statements/s1/sectionRunner.ts +++ b/src/sec/forms/registration-statements/s1/sectionRunner.ts @@ -6,10 +6,19 @@ import type { ExtractionDeadLetterRepo } from "../../../../storage/dead-letter/ExtractionDeadLetterRepo"; -const RAW_CONFIDENCE_FLOOR = Number(process.env.SEC_S1_CONFIDENCE_FLOOR ?? "0"); -// A non-numeric SEC_S1_CONFIDENCE_FLOOR would be NaN, and `confidence >= NaN` is -// always false — silently dropping every row. Fall back to 0 (no floor). -export const CONFIDENCE_FLOOR = Number.isFinite(RAW_CONFIDENCE_FLOOR) ? RAW_CONFIDENCE_FLOOR : 0; +/** + * Parse a confidence-floor env value. Undefined, empty, or non-numeric input + * (which `Number` would turn into `0` or `NaN`, silently dropping every row) + * falls back to `fallback`. + */ +export function parseConfidenceFloor(raw: string | undefined, fallback: number): number { + if (raw === undefined || raw.trim() === "") return fallback; + const n = Number(raw); + return Number.isFinite(n) ? n : fallback; +} + +/** Shared default floor (S-1 / 424); merger-proxy overrides via makeRunSection. */ +export const CONFIDENCE_FLOOR = parseConfidenceFloor(process.env.SEC_S1_CONFIDENCE_FLOOR, 0); export interface RunSectionArgs { readonly sectionName: string; @@ -59,8 +68,10 @@ export function makeRunSection(opts: { readonly extractor_id: string; readonly extractor_version: string; readonly accession_number: string; + readonly confidenceFloor?: number; }): RunSection { const { deadLetters, extractor_id, extractor_version, accession_number } = opts; + const floor = opts.confidenceFloor ?? CONFIDENCE_FLOOR; return async function runSection( sargs: RunSectionArgs @@ -85,7 +96,7 @@ export function makeRunSection(opts: { try { const raw = await sargs.extract(sargs.text); - const confident = raw.filter((r) => r.confidence >= CONFIDENCE_FLOOR); + const confident = raw.filter((r) => r.confidence >= floor); const text = sargs.text; const verifyRow = sargs.verifyRow; let rows: TRow[]; From 8ce10f7b9e997c8f91b35d6c4708e3653b91100b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 19:31:48 +0000 Subject: [PATCH 19/39] feat(spac): SEC_MERGER_PROXY_CONFIDENCE_FLOOR for merger extraction --- .../Form_DEFM14A.storage.ts | 7 +++- .../s1/mergerModel.test.ts | 36 +++++++++++++++++++ .../registration-statements/s1/mergerModel.ts | 10 ++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 src/sec/forms/registration-statements/s1/mergerModel.test.ts diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts index bc6ec43..00fc779 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts @@ -22,7 +22,11 @@ import { makeRunSection } from "../registration-statements/s1/sectionRunner"; import { spanAppearsIn } from "../registration-statements/s1/verifySourceSpan"; import { extractMergerDeal } from "../registration-statements/s1/sectionExtractors"; import type { MergerDealRow } from "../registration-statements/s1/mergerDealSchema"; -import { getMergerProxyModel, resolveModelId } from "../registration-statements/s1/mergerModel"; +import { + getMergerProxyModel, + getMergerProxyConfidenceFloor, + resolveModelId, +} from "../registration-statements/s1/mergerModel"; import type { FormS1Parsed } from "../registration-statements/Form_S_1"; const EXTRACTOR_ID = "merger-proxy"; @@ -109,6 +113,7 @@ export async function processMergerProxy(args: ProcessMergerProxyArgs): Promise< extractor_id: EXTRACTOR_ID, extractor_version, accession_number, + confidenceFloor: getMergerProxyConfidenceFloor(), }); let idx = 0; diff --git a/src/sec/forms/registration-statements/s1/mergerModel.test.ts b/src/sec/forms/registration-statements/s1/mergerModel.test.ts new file mode 100644 index 0000000..324c51a --- /dev/null +++ b/src/sec/forms/registration-statements/s1/mergerModel.test.ts @@ -0,0 +1,36 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { getMergerProxyConfidenceFloor } from "./mergerModel"; +import { CONFIDENCE_FLOOR } from "./sectionRunner"; + +const ENV = "SEC_MERGER_PROXY_CONFIDENCE_FLOOR"; +// Snapshot immediately before each test (not at module load) so suite-level +// setup or other files touching this env var cannot corrupt the baseline. +let original: string | undefined; +beforeEach(() => { + original = process.env[ENV]; +}); +afterEach(() => { + if (original === undefined) delete process.env[ENV]; + else process.env[ENV] = original; +}); + +describe("getMergerProxyConfidenceFloor", () => { + it("uses the per-extractor env when set", () => { + process.env[ENV] = "0.7"; + expect(getMergerProxyConfidenceFloor()).toBe(0.7); + }); + it("falls back to the shared global floor when unset", () => { + delete process.env[ENV]; + expect(getMergerProxyConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); + it("falls back on a non-numeric value", () => { + process.env[ENV] = "high"; + expect(getMergerProxyConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/mergerModel.ts b/src/sec/forms/registration-statements/s1/mergerModel.ts index 9d04ec0..089d477 100644 --- a/src/sec/forms/registration-statements/s1/mergerModel.ts +++ b/src/sec/forms/registration-statements/s1/mergerModel.ts @@ -7,6 +7,7 @@ import type { ModelConfig } from "workglow"; import { getGlobalModelRepository } from "workglow"; import { resolveModelId } from "./s1Model"; +import { CONFIDENCE_FLOOR, parseConfidenceFloor } from "./sectionRunner"; export { resolveModelId }; @@ -29,3 +30,12 @@ export async function getMergerProxyModel(): Promise { } return record as ModelConfig; } + +/** + * Confidence floor for merger-proxy extraction. `SEC_MERGER_PROXY_CONFIDENCE_FLOOR` + * overrides; when unset it falls back to the shared `CONFIDENCE_FLOOR` + * (`SEC_S1_CONFIDENCE_FLOOR`), so behavior is unchanged unless explicitly set. + */ +export function getMergerProxyConfidenceFloor(): number { + return parseConfidenceFloor(process.env.SEC_MERGER_PROXY_CONFIDENCE_FLOOR, CONFIDENCE_FLOOR); +} From 50370393653f30c009b0ac25b946795b29ee0208 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 19:35:50 +0000 Subject: [PATCH 20/39] feat(spac): cover 14C consent + revised proxies in merger-proxy extractor --- .../forms/proxies-information-statements/Form_DEFM14C.ts | 6 ++++++ .../forms/proxies-information-statements/Form_DEFR14A.ts | 6 ++++++ .../forms/proxies-information-statements/Form_PREM14C.ts | 6 ++++++ .../forms/proxies-information-statements/Form_PRER14A.ts | 6 ++++++ src/storage/versioning/extractorIds.test.ts | 2 +- src/storage/versioning/extractorIds.ts | 4 ++++ src/task/forms/ProcessAccessionDocFormTask.ts | 4 ++++ 7 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts index 37b1275..610a92f 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14C.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_DEFM14C extends Form { static readonly name = "Definitive Information Statement for Merger or Acquisition"; static readonly description = "A definitive information statement relating to a merger or an acquisition."; static readonly forms = ["DEFM14C"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts b/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts index 067e33e..44b5923 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFR14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_DEFR14A extends Form { static readonly name = "Definitive Revised Proxy Soliciting Materials"; static readonly description = "Definitive revised proxy soliciting materials filed pursuant to Section 14(a) of the Securities Exchange Act of 1934"; static readonly forms = ["DEFR14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_PREM14C.ts b/src/sec/forms/proxies-information-statements/Form_PREM14C.ts index 744325d..6112cbc 100644 --- a/src/sec/forms/proxies-information-statements/Form_PREM14C.ts +++ b/src/sec/forms/proxies-information-statements/Form_PREM14C.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_PREM14C extends Form { static readonly name = "Preliminary Information Statement for Merger or Acquisition"; static readonly description = "A preliminary information statement relating to a merger or acquisition."; static readonly forms = ["PREM14C"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/sec/forms/proxies-information-statements/Form_PRER14A.ts b/src/sec/forms/proxies-information-statements/Form_PRER14A.ts index 87e9155..f302acd 100644 --- a/src/sec/forms/proxies-information-statements/Form_PRER14A.ts +++ b/src/sec/forms/proxies-information-statements/Form_PRER14A.ts @@ -5,10 +5,16 @@ */ import { Form } from "../Form"; +import { parseRegistrationSubmission } from "../registration-statements/s1/parseSubmission"; +import type { FormS1Parsed } from "../registration-statements/Form_S_1"; export class Form_PRER14A extends Form { static readonly name = "Preliminary Revised Proxy Statement"; static readonly description = "Preliminary revised proxy statement filed pursuant to Section 14(a) of the Securities Exchange Act of 1934"; static readonly forms = ["PRER14A"] as const; + + static override async parse(form: string, txt: string): Promise { + return parseRegistrationSubmission(form, txt); + } } diff --git a/src/storage/versioning/extractorIds.test.ts b/src/storage/versioning/extractorIds.test.ts index 3c410d7..800e5dc 100644 --- a/src/storage/versioning/extractorIds.test.ts +++ b/src/storage/versioning/extractorIds.test.ts @@ -30,7 +30,7 @@ describe("extractorIds", () => { }); it("maps the merger proxies to extractor id 'merger-proxy'", () => { - for (const form of ["DEFM14A", "PREM14A"]) { + for (const form of ["DEFM14A", "PREM14A", "DEFM14C", "PREM14C", "DEFR14A", "PRER14A"]) { expect(formToExtractorId(form)).toBe("merger-proxy"); } }); diff --git a/src/storage/versioning/extractorIds.ts b/src/storage/versioning/extractorIds.ts index 6f605e0..6c387d7 100644 --- a/src/storage/versioning/extractorIds.ts +++ b/src/storage/versioning/extractorIds.ts @@ -80,6 +80,10 @@ export const FORM_TO_EXTRACTOR_ID: Readonly> = { "8-K/A": "8-K", DEFM14A: "merger-proxy", PREM14A: "merger-proxy", + DEFM14C: "merger-proxy", + PREM14C: "merger-proxy", + DEFR14A: "merger-proxy", + PRER14A: "merger-proxy", }; export function formToExtractorId(form: string): ExtractorId | undefined { diff --git a/src/task/forms/ProcessAccessionDocFormTask.ts b/src/task/forms/ProcessAccessionDocFormTask.ts index 335595a..61cdd24 100644 --- a/src/task/forms/ProcessAccessionDocFormTask.ts +++ b/src/task/forms/ProcessAccessionDocFormTask.ts @@ -360,6 +360,10 @@ export class ProcessAccessionDocFormTask extends Task< break; case "DEFM14A": case "PREM14A": + case "DEFM14C": + case "PREM14C": + case "DEFR14A": + case "PRER14A": await processMergerProxy({ ...storageArgs, form: form!, formMergerProxy: parsed }); break; default: From a765cffddc857da25614485ffff013d27597cedf Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 19:39:55 +0000 Subject: [PATCH 21/39] feat(spac): definitive consent statements (DEFM14C) emit the proxy milestone Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../Form_DEFM14A.storage.e2e.test.ts | 69 +++++++++++++++++-- .../Form_DEFM14A.storage.ts | 19 ++--- 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts index df37a37..aca3f11 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts @@ -69,16 +69,21 @@ describe("processMergerProxy (e2e)", () => { }); } - async function runProxy(cik: number, accession_number: string): Promise { + async function runProxy( + cik: number, + accession_number: string, + form = "DEFM14A", + filing_date = "2021-05-01" + ): Promise { const txt = await Bun.file(FIXTURE).text(); - const parsed = await Form_DEFM14A.parse("DEFM14A", txt); + const parsed = await Form_DEFM14A.parse(form, txt); await processMergerProxy({ cik, file_number: "", accession_number, - filing_date: "2021-05-01", - primary_doc: "defm14a.htm", - form: "DEFM14A", + filing_date, + primary_doc: "proxy.htm", + form, formMergerProxy: parsed, model: fakeS1Model(), }); @@ -127,4 +132,58 @@ describe("processMergerProxy (e2e)", () => { expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); expect(await repo.getDeals(300)).toHaveLength(1); }); + + it("emits a proxy event for a definitive consent statement (DEFM14C)", async () => { + await seedSpacWithOpenDeal(110); + cleanup = scriptMergerDeal(); + await runProxy(110, "110-defm14c", "DEFM14C"); + + const events = await repo.getEvents(110); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); + const row = await repo.getSpac(110); + expect(row?.status).toBe("proxy"); + expect(row?.target_name).toBe("Acme Target Inc."); + }); + + it("does not emit a proxy event for a preliminary consent statement (PREM14C)", async () => { + await seedSpacWithOpenDeal(111); + cleanup = scriptMergerDeal(); + await runProxy(111, "111-prem14c", "PREM14C"); + + const events = await repo.getEvents(111); + expect(events.some((e) => e.event_type === "proxy")).toBe(false); + const row = await repo.getSpac(111); + expect(row?.status).toBe("deal_announced"); + expect(row?.target_name).toBe("Acme Target Inc."); // still correlated + }); + + it("a revised proxy (DEFR14A) supersedes target/pipe without a second proxy event", async () => { + await seedSpacWithOpenDeal(112); + const dealWithPipe = (pipe_amount: number) => [ + { + target_name: "Acme Target Inc.", + pipe_amount, + merger_consideration: "$10.00 per share in stock", + confidence: 0.95, + source_span: "business combination with Acme Target Inc.", + }, + ]; + + // Definitive proxy first: emits the proxy event + initial PIPE. + let registration = registerFakeStructuredProvider(dealWithPipe(150000000)); + cleanup = registration.unregister; // guard against a throw inside runProxy + await runProxy(112, "112-defm", "DEFM14A", "2021-05-01"); + registration.unregister(); + cleanup = undefined; + + // Revised definitive proxy, filed later -> its extraction wins correlation. + registration = registerFakeStructuredProvider(dealWithPipe(225000000)); + cleanup = registration.unregister; + await runProxy(112, "112-defr", "DEFR14A", "2021-05-10"); + + const events = await repo.getEvents(112); + expect(events.filter((e) => e.event_type === "proxy")).toHaveLength(1); // only DEFM14A + const deals = await repo.getDeals(112); + expect(deals[0].pipe_amount).toBe(225000000); // revised value wins (later filing_date) + }); }); diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts index 00fc779..58e36fd 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.ts @@ -32,8 +32,8 @@ import type { FormS1Parsed } from "../registration-statements/Form_S_1"; const EXTRACTOR_ID = "merger-proxy"; const DEFAULT_EXTRACTOR_VERSION = "1.0.0"; const MERGER_SECTION = "merger"; -/** Definitive proxies emit a `proxy` lifecycle event; preliminary ones do not. */ -const DEFINITIVE_PROXY_FORMS = new Set(["DEFM14A"]); +/** Definitive merger statements emit a `proxy` lifecycle event; others do not. */ +const DEFINITIVE_PROXY_FORMS = new Set(["DEFM14A", "DEFM14C"]); export interface ProcessMergerProxyArgs { readonly cik: number; @@ -47,12 +47,15 @@ export interface ProcessMergerProxyArgs { } /** - * Extract the deal identity + PIPE from a SPAC merger proxy (DEFM14A/PREM14A). - * Gated on a known SPAC. Persists a `spac_merger_extraction` row, observes the - * target company, then records the proxy event and recomputes deals (correlation - * derives target/pipe onto the matching `spac_deal`). Degrades gracefully: when - * the merger section is absent or low-confidence, it dead-letters and still emits - * the proxy event so `proxy_date` advances. + * Extract the deal identity + PIPE from a SPAC merger proxy — the 14A/14C merger + * and revised-proxy family (`DEFM14A`/`PREM14A`, `DEFM14C`/`PREM14C`, + * `DEFR14A`/`PRER14A`); see {@link DEFINITIVE_PROXY_FORMS} for which emit the + * proxy event. Gated on a known SPAC. Persists a `spac_merger_extraction` row, + * observes the target company, then records the proxy event and recomputes deals + * (correlation derives target/pipe onto the matching `spac_deal`). Degrades + * gracefully: when the merger section is absent or low-confidence, it dead-letters + * and still emits the proxy event (for definitive merger statements) so + * `proxy_date` advances. */ export async function processMergerProxy(args: ProcessMergerProxyArgs): Promise { const { cik, accession_number, form, filing_date, formMergerProxy } = args; From 7ee52a3f887433904abbe3f173266631bbfd72a0 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 19:44:36 +0000 Subject: [PATCH 22/39] docs(spac): document merger-proxy coverage + confidence floor --- CLAUDE.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 1a8b2a2..ebbf384 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -186,19 +186,25 @@ redemption amounts stay null until the narrative/AI extractors (S-4 / DEFM14A / 425) land — 8-K item codes carry no names or amounts. Still deferred: name/SIC/ ticker transitions and Form 25/15 de-registration. -**Merger proxies** (`DEFM14A`, `PREM14A`; extractor id `merger-proxy`) run +**Merger proxies** (`DEFM14A`/`PREM14A`, the `DEFM14C`/`PREM14C` consent statements, +and the `DEFR14A`/`PRER14A` revised proxies; extractor id `merger-proxy`) run `processMergerProxy` (known SPACs only — a `spac` row must already exist): AI extraction over the merger / business-combination / PIPE sections records a per-accession `spac_merger_extraction` row (target name/CIK, PIPE amount, merger consideration) and observes the target company (`relation: "merger-proxy:target"`, `target_cik` resolved from the canonical company when it has one). `deriveDeals` correlates each extraction onto the matching `spac_deal` by filing-date window — -*deriving* `target_name` / `target_cik` / `pipe_amount` (definitive DEFM14A -supersedes preliminary PREM14A), which retires the 8-K path's positional -merge-preserve. DEFM14A also emits a `proxy` event (→ `proxy_date` / -`status = proxy`); PREM14A emits none. Redemption actuals stay null (post-vote -8-K, deferred) and S-4 is deferred (newco-CIK linkage). Configure the model via -`SEC_MERGER_PROXY_MODEL` (default `claude-sonnet-4-6`). +*deriving* `target_name` / `target_cik` / `pipe_amount` (a later filing supersedes +an earlier one — definitive over preliminary, revised over definitive), which +retires the 8-K path's positional merge-preserve. Only the **definitive merger** +statements `DEFM14A` and `DEFM14C` emit the `proxy` event (→ `proxy_date` / +`status = proxy`): a consent deal (14C) has no `8-K 5.07` vote, so the definitive +14C is its only approval-stage signal. Preliminary (`PREM14A`/`PREM14C`) and revised +(`DEFR14A`/`PRER14A`) proxies are extraction-only. Redemption actuals stay null +(post-vote 8-K, deferred) and S-4 is deferred (newco-CIK linkage). Configure the +model via `SEC_MERGER_PROXY_MODEL` (default `claude-sonnet-4-6`) and an optional +confidence floor via `SEC_MERGER_PROXY_CONFIDENCE_FLOOR` (falls back to the shared +`SEC_S1_CONFIDENCE_FLOOR` when unset). ```bash sec fetch form DEFM14A # fetch + extract a merger proxy From e05e88573cff63fb84259c239194a8a84d769408 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 19:47:09 +0000 Subject: [PATCH 23/39] test(spac): cover PRER14A (preliminary revised) extraction-only path --- .../Form_DEFM14A.storage.e2e.test.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts index aca3f11..307a42c 100644 --- a/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts +++ b/src/sec/forms/proxies-information-statements/Form_DEFM14A.storage.e2e.test.ts @@ -157,6 +157,18 @@ describe("processMergerProxy (e2e)", () => { expect(row?.target_name).toBe("Acme Target Inc."); // still correlated }); + it("does not emit a proxy event for a preliminary revised proxy (PRER14A)", async () => { + await seedSpacWithOpenDeal(113); + cleanup = scriptMergerDeal(); + await runProxy(113, "113-prer14a", "PRER14A"); + + const events = await repo.getEvents(113); + expect(events.some((e) => e.event_type === "proxy")).toBe(false); + const row = await repo.getSpac(113); + expect(row?.status).toBe("deal_announced"); + expect(row?.target_name).toBe("Acme Target Inc."); // extraction-only, still correlated + }); + it("a revised proxy (DEFR14A) supersedes target/pipe without a second proxy event", async () => { await seedSpacWithOpenDeal(112); const dealWithPipe = (pipe_amount: number) => [ From ce95755d64c8940dd79d9f11094412e72888f690 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 03:53:36 +0000 Subject: [PATCH 24/39] feat(sec): slice 8-K primary + EX-99 exhibits from submission Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../s1/parseEightKSubmission.test.ts | 41 +++++++++++++++++++ .../s1/parseSubmission.ts | 37 ++++++++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts diff --git a/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts b/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts new file mode 100644 index 0000000..0ab9751 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, expect, it } from "vitest"; +import { parseEightKSubmission } from "./parseSubmission"; + +const wrap = (docs: string): string => + `\nACCESSION NUMBER: 0000000000-26-000001\n\n${docs}`; +const doc = (type: string, seq: number, body: string): string => + `\n${type}\n${seq}\n\n${body}\n\n\n`; + +describe("parseEightKSubmission", () => { + it("selects the primary 8-K body and collects EX-99.x exhibits", () => { + const txt = wrap( + doc("8-K", 1, "

Primary body

") + + doc("EX-99.1", 2, "

Press release

") + + doc("EX-99.2", 3, "

Second exhibit

") + + doc("EX-101.INS", 4, "ignored") + ); + const out = parseEightKSubmission("8-K", txt); + expect(out.primaryHtml).toContain("Primary body"); + expect(out.exhibitsHtml).toHaveLength(2); + expect(out.exhibitsHtml[0]).toContain("Press release"); + expect(out.exhibitsHtml[1]).toContain("Second exhibit"); + }); + + it("falls back to 1 then first doc when no TYPE matches the form", () => { + const txt = wrap(doc("8-K12B", 1, "

Seq one

") + doc("EX-99.1", 2, "

PR

")); + const out = parseEightKSubmission("8-K", txt); + expect(out.primaryHtml).toContain("Seq one"); + expect(out.exhibitsHtml).toHaveLength(1); + }); + + it("returns a bare body and no exhibits when there is no DOCUMENT envelope", () => { + const out = parseEightKSubmission("8-K", "

just a body

"); + expect(out.primaryHtml).toContain("just a body"); + expect(out.exhibitsHtml).toEqual([]); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/parseSubmission.ts b/src/sec/forms/registration-statements/s1/parseSubmission.ts index 7d06435..0868d7f 100644 --- a/src/sec/forms/registration-statements/s1/parseSubmission.ts +++ b/src/sec/forms/registration-statements/s1/parseSubmission.ts @@ -33,6 +33,12 @@ function headerSlice(txt: string): string { return firstDoc !== -1 ? txt.slice(0, firstDoc) : txt; } +/** Body after the SGML `
` boundary, else the whole input. */ +function bodyAfterHeader(txt: string): string { + const end = txt.indexOf(""); + return end !== -1 ? txt.slice(end + "".length) : txt; +} + /** * Parses the human-readable EDGAR submission header (and the older tagged * `` form as a fallback). Tolerant: any missing field is null. @@ -124,8 +130,7 @@ export function parseRegistrationSubmission(form: string, txt: string): FormS1Pa // No envelope: treat the input as a bare body. If a SEC-HEADER is // present (a malformed/truncated submission missing its document blocks), drop // it so the header lines aren't fed to the HTML converter as body text. - const end = txt.indexOf(""); - const html = end !== -1 ? txt.slice(end + "".length) : txt; + const html = bodyAfterHeader(txt); return { header, html, xbrlInstanceXml: null, feeExhibitHtml: null }; } const byType = docs.find((d) => d.type !== null && d.type.toUpperCase() === form.toUpperCase()); @@ -138,3 +143,31 @@ export function parseRegistrationSubmission(form: string, txt: string): FormS1Pa feeExhibitHtml: findFeeExhibit(docs), }; } + +/** Primary document body + EX-99.x exhibit bodies sliced from an 8-K submission. */ +export interface EightKSubmissionDocs { + readonly primaryHtml: string; + readonly exhibitsHtml: readonly string[]; +} + +/** + * Slices a full-submission `.txt` (or bare primary-doc body) into the primary + * document and its `EX-99.x` exhibits. The primary is the block whose `` + * equals `form`, else ` 1`, else the first; exhibits are every block + * whose `` starts with `EX-99`. With no `` envelope the whole + * input is the primary body and there are no exhibits. + */ +export function parseEightKSubmission(form: string, txt: string): EightKSubmissionDocs { + const docs = parseDocuments(txt); + if (docs.length === 0) { + const html = bodyAfterHeader(txt); + return { primaryHtml: html, exhibitsHtml: [] }; + } + const byType = docs.find((d) => d.type !== null && d.type.toUpperCase() === form.toUpperCase()); + const bySeq = docs.find((d) => d.sequence === 1); + const primary = byType ?? bySeq ?? docs[0]; + const exhibitsHtml = docs + .filter((d) => d.type !== null && d.type.toUpperCase().startsWith("EX-99")) + .map((d) => d.body); + return { primaryHtml: primary.body, exhibitsHtml }; +} From c6406d6ac5f4ad2c9e174efac785f2bc135d6ad3 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 03:59:22 +0000 Subject: [PATCH 25/39] feat(sec): add redemption schema and extractRedemption Adds RedemptionOutputSchema (shares, amount, price_per_share, confidence, source_span) and extractRedemption() in sectionExtractors.ts, mirroring the extractMergerDeal pattern. Covered by a two-case bun test. Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../s1/extractRedemption.test.ts | 53 +++++++++++++++++++ .../s1/redemptionSchema.ts | 25 +++++++++ .../s1/sectionExtractors.ts | 22 ++++++++ 3 files changed, 100 insertions(+) create mode 100644 src/sec/forms/registration-statements/s1/extractRedemption.test.ts create mode 100644 src/sec/forms/registration-statements/s1/redemptionSchema.ts diff --git a/src/sec/forms/registration-statements/s1/extractRedemption.test.ts b/src/sec/forms/registration-statements/s1/extractRedemption.test.ts new file mode 100644 index 0000000..524d7fa --- /dev/null +++ b/src/sec/forms/registration-statements/s1/extractRedemption.test.ts @@ -0,0 +1,53 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, describe, expect, it } from "bun:test"; +import { extractRedemption } from "./sectionExtractors"; +import { fakeS1Model, registerFakeStructuredProvider } from "./testing/fakeStructuredProvider"; + +let cleanup: (() => void) | undefined; +afterEach(() => { + cleanup?.(); + cleanup = undefined; +}); + +describe("extractRedemption", () => { + it("returns the parsed redemption row", async () => { + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: 1234567, + redemption_amount: 12400000, + price_per_share: 10.05, + confidence: 0.9, + source_span: "1,234,567 shares elected to redeem for $12,400,000", + }, + ]); + cleanup = unregister; + const text = "Holders of 1,234,567 shares elected to redeem for $12,400,000."; + const row = await extractRedemption(text, fakeS1Model()); + expect(row).not.toBeNull(); + expect(row?.redemption_shares).toBe(1234567); + expect(row?.redemption_amount).toBe(12400000); + }); + + it("returns null when the model cites no source span", async () => { + // The null source_span is what drops the row — even a confident response is + // discarded without a verbatim span to anchor it (any confidence floor lives + // in the section runner, not here). + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: null, + redemption_amount: null, + price_per_share: null, + confidence: 0.95, + source_span: null, + }, + ]); + cleanup = unregister; + const row = await extractRedemption("no redemption here", fakeS1Model()); + expect(row).toBeNull(); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/redemptionSchema.ts b/src/sec/forms/registration-statements/s1/redemptionSchema.ts new file mode 100644 index 0000000..d19682e --- /dev/null +++ b/src/sec/forms/registration-statements/s1/redemptionSchema.ts @@ -0,0 +1,25 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import { TypeNullable } from "../../../../util/TypeBoxUtil"; + +/** The single realized-redemption object the model returns from an 8-K. */ +export const RedemptionOutputSchema = Type.Object({ + redemption_shares: TypeNullable( + Type.Integer({ minimum: 0, description: "Shares redeemed (public shares tendered)" }) + ), + redemption_amount: TypeNullable( + Type.Number({ description: "Total dollars paid to redeeming holders" }) + ), + price_per_share: TypeNullable( + Type.Number({ description: "Per-share redemption / trust value, when stated" }) + ), + confidence: Type.Number({ minimum: 0, maximum: 1 }), + source_span: TypeNullable(Type.String()), +}); + +export type RedemptionRow = Static; diff --git a/src/sec/forms/registration-statements/s1/sectionExtractors.ts b/src/sec/forms/registration-statements/s1/sectionExtractors.ts index c974870..9e6f950 100644 --- a/src/sec/forms/registration-statements/s1/sectionExtractors.ts +++ b/src/sec/forms/registration-statements/s1/sectionExtractors.ts @@ -19,6 +19,7 @@ import { OfferingTermsOutputSchema, type OfferingTermsRow } from "./offeringTerm import { UnderwriterOutputSchema, type UnderwriterRowOut } from "./underwriterSchema"; import { UseOfProceedsOutputSchema, type UseOfProceedsLineRow } from "./useOfProceedsSchema"; import { MergerDealOutputSchema, type MergerDealRow } from "./mergerDealSchema"; +import { RedemptionOutputSchema, type RedemptionRow } from "./redemptionSchema"; const MAX_TOKENS = 4096; @@ -243,3 +244,24 @@ export async function extractUseOfProceeds( const obj = await runStructured(model, prompt, UseOfProceedsOutputSchema); return (obj.line_items as UseOfProceedsLineRow[] | undefined) ?? []; } + +/** + * Extracts realized redemptions (shares, dollars, per-share value) from an 8-K + * narrative (vote-results / closing press release). Returns null when the model + * is not confident or cites no source span. Mirrors {@link extractMergerDeal}. + */ +export async function extractRedemption( + sectionText: string, + model: ModelConfig +): Promise { + const instructions = + "From the SEC 8-K text below, extract the REALIZED redemption of public " + + "shares (e.g. reported after a shareholder vote or upon closing). Report " + + "only figures explicitly stated — do NOT multiply shares by price to " + + "synthesize an amount. If the text does not report realized redemptions, " + + "return confidence 0 and null fields."; + const prompt = `${UNTRUSTED_PREAMBLE}\n\n${instructions}\n\n${wrapUntrusted(sectionText)}`; + const obj = await runStructured(model, prompt, RedemptionOutputSchema); + if (obj.confidence == null || obj.source_span == null) return null; + return obj as unknown as RedemptionRow; +} From 0811acb01f229eaa1b852562035ab93960ef6647 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:05:27 +0000 Subject: [PATCH 26/39] feat(sec): add redemption model + confidence-floor config Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../s1/redemptionModel.test.ts | 51 +++++++++++++++++++ .../s1/redemptionModel.ts | 41 +++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 src/sec/forms/registration-statements/s1/redemptionModel.test.ts create mode 100644 src/sec/forms/registration-statements/s1/redemptionModel.ts diff --git a/src/sec/forms/registration-statements/s1/redemptionModel.test.ts b/src/sec/forms/registration-statements/s1/redemptionModel.test.ts new file mode 100644 index 0000000..599e1be --- /dev/null +++ b/src/sec/forms/registration-statements/s1/redemptionModel.test.ts @@ -0,0 +1,51 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { getRedemptionConfidenceFloor, getRedemptionModelId } from "./redemptionModel"; +import { CONFIDENCE_FLOOR } from "./sectionRunner"; + +const FLOOR_ENV = "SEC_REDEMPTION_CONFIDENCE_FLOOR"; +const MODEL_ENV = "SEC_REDEMPTION_MODEL"; + +let originalFloor: string | undefined; +let originalModel: string | undefined; +beforeEach(() => { + originalFloor = process.env[FLOOR_ENV]; + originalModel = process.env[MODEL_ENV]; +}); +afterEach(() => { + if (originalFloor === undefined) delete process.env[FLOOR_ENV]; + else process.env[FLOOR_ENV] = originalFloor; + if (originalModel === undefined) delete process.env[MODEL_ENV]; + else process.env[MODEL_ENV] = originalModel; +}); + +describe("getRedemptionModelId", () => { + it("defaults to claude-sonnet-4-6 when unset", () => { + delete process.env[MODEL_ENV]; + expect(getRedemptionModelId()).toBe("claude-sonnet-4-6"); + }); + it("honors SEC_REDEMPTION_MODEL when set", () => { + process.env[MODEL_ENV] = "claude-opus-4-8"; + expect(getRedemptionModelId()).toBe("claude-opus-4-8"); + }); +}); + +describe("getRedemptionConfidenceFloor", () => { + it("uses the per-extractor env when set", () => { + process.env[FLOOR_ENV] = "0.7"; + expect(getRedemptionConfidenceFloor()).toBe(0.7); + }); + it("falls back to the shared global floor when unset", () => { + delete process.env[FLOOR_ENV]; + expect(getRedemptionConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); + it("falls back on a non-numeric value", () => { + process.env[FLOOR_ENV] = "high"; + expect(getRedemptionConfidenceFloor()).toBe(CONFIDENCE_FLOOR); + }); +}); diff --git a/src/sec/forms/registration-statements/s1/redemptionModel.ts b/src/sec/forms/registration-statements/s1/redemptionModel.ts new file mode 100644 index 0000000..5b32f37 --- /dev/null +++ b/src/sec/forms/registration-statements/s1/redemptionModel.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { ModelConfig } from "workglow"; +import { getGlobalModelRepository } from "workglow"; +import { resolveModelId } from "./s1Model"; +import { CONFIDENCE_FLOOR, parseConfidenceFloor } from "./sectionRunner"; + +export { resolveModelId }; + +const DEFAULT_REDEMPTION_MODEL = "claude-sonnet-4-6"; + +/** The model id used for redemption extraction; overridable via SEC_REDEMPTION_MODEL. */ +export function getRedemptionModelId(): string { + const id = (process.env.SEC_REDEMPTION_MODEL ?? "").trim(); + return id === "" ? DEFAULT_REDEMPTION_MODEL : id; +} + +/** Resolves the configured redemption model into a ModelConfig. */ +export async function getRedemptionModel(): Promise { + const id = getRedemptionModelId(); + const record = await getGlobalModelRepository().findByName(id); + if (!record) { + throw new Error( + `Redemption model '${id}' is not registered. Register it or set SEC_REDEMPTION_MODEL to a known model id.` + ); + } + return record as ModelConfig; +} + +/** + * Confidence floor for redemption extraction. `SEC_REDEMPTION_CONFIDENCE_FLOOR` + * overrides; when unset it falls back to the shared `CONFIDENCE_FLOOR` + * (`SEC_S1_CONFIDENCE_FLOOR`). + */ +export function getRedemptionConfidenceFloor(): number { + return parseConfidenceFloor(process.env.SEC_REDEMPTION_CONFIDENCE_FLOOR, CONFIDENCE_FLOOR); +} From c847d7a8b4dde7b7ddd4295842c3e9fd8eb4d03d Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:08:54 +0000 Subject: [PATCH 27/39] feat(sec): add spac_redemption_extraction storage + DI --- src/config/DefaultDI.ts | 14 +++++ src/config/TestingDI.ts | 13 ++++ src/config/setupAllDatabases.ts | 2 + .../spac/SpacRedemptionExtractionRepo.test.ts | 61 +++++++++++++++++++ .../spac/SpacRedemptionExtractionRepo.ts | 34 +++++++++++ .../spac/SpacRedemptionExtractionSchema.ts | 41 +++++++++++++ 6 files changed, 165 insertions(+) create mode 100644 src/storage/spac/SpacRedemptionExtractionRepo.test.ts create mode 100644 src/storage/spac/SpacRedemptionExtractionRepo.ts create mode 100644 src/storage/spac/SpacRedemptionExtractionSchema.ts diff --git a/src/config/DefaultDI.ts b/src/config/DefaultDI.ts index 8c2ae7f..6a2a650 100644 --- a/src/config/DefaultDI.ts +++ b/src/config/DefaultDI.ts @@ -349,6 +349,11 @@ import { SpacMergerExtractionPrimaryKeyNames, SpacMergerExtractionSchema, } from "../storage/spac/SpacMergerExtractionSchema"; +import { + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + SpacRedemptionExtractionPrimaryKeyNames, + SpacRedemptionExtractionSchema, +} from "../storage/spac/SpacRedemptionExtractionSchema"; import { createStorage } from "./createStorage"; export const DefaultDI = () => { @@ -679,6 +684,15 @@ export const DefaultDI = () => { [["cik"]] ) ); + globalServiceRegistry.registerInstance( + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + createStorage( + "spac_redemption_extraction", + SpacRedemptionExtractionSchema, + SpacRedemptionExtractionPrimaryKeyNames, + [["cik"]] + ) + ); // ----- Observation / Canonical / Resolver ----- globalServiceRegistry.registerInstance( diff --git a/src/config/TestingDI.ts b/src/config/TestingDI.ts index 95f4002..e246c4b 100644 --- a/src/config/TestingDI.ts +++ b/src/config/TestingDI.ts @@ -263,6 +263,11 @@ import { SpacMergerExtractionPrimaryKeyNames, SpacMergerExtractionSchema, } from "../storage/spac/SpacMergerExtractionSchema"; +import { + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + SpacRedemptionExtractionPrimaryKeyNames, + SpacRedemptionExtractionSchema, +} from "../storage/spac/SpacRedemptionExtractionSchema"; import { CANONICAL_COMPANY_REPOSITORY_TOKEN, CanonicalCompanyPrimaryKeyNames, @@ -506,6 +511,14 @@ export function resetDependencyInjectionsForTesting() { [["cik"]] ) ); + globalServiceRegistry.registerInstance( + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + new InMemoryTabularStorage( + SpacRedemptionExtractionSchema, + SpacRedemptionExtractionPrimaryKeyNames, + [["cik"]] + ) + ); // Initialize Crowdfunding repositories globalServiceRegistry.registerInstance( diff --git a/src/config/setupAllDatabases.ts b/src/config/setupAllDatabases.ts index b98b631..64c6348 100644 --- a/src/config/setupAllDatabases.ts +++ b/src/config/setupAllDatabases.ts @@ -55,6 +55,7 @@ import { SPAC_DEAL_REPOSITORY_TOKEN } from "../storage/spac/SpacDealSchema"; import { SPAC_EVENT_REPOSITORY_TOKEN } from "../storage/spac/SpacEventSchema"; import { SPAC_HISTORY_REPOSITORY_TOKEN } from "../storage/spac/SpacHistorySchema"; import { SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN } from "../storage/spac/SpacMergerExtractionSchema"; +import { SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN } from "../storage/spac/SpacRedemptionExtractionSchema"; import { CANONICAL_COMPANY_ALIAS_REPOSITORY_TOKEN, CANONICAL_PERSON_ALIAS_REPOSITORY_TOKEN, @@ -147,6 +148,7 @@ export async function setupAllDatabases(): Promise { await globalServiceRegistry.get(SPAC_EVENT_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(SPAC_HISTORY_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(SPAC_MERGER_EXTRACTION_REPOSITORY_TOKEN).setupDatabase(); + await globalServiceRegistry.get(SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(CIK_LAST_UPDATE_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(PROCESSED_FACTS_REPOSITORY_TOKEN).setupDatabase(); await globalServiceRegistry.get(PROCESSED_SUBMISSIONS_REPOSITORY_TOKEN).setupDatabase(); diff --git a/src/storage/spac/SpacRedemptionExtractionRepo.test.ts b/src/storage/spac/SpacRedemptionExtractionRepo.test.ts new file mode 100644 index 0000000..e21f716 --- /dev/null +++ b/src/storage/spac/SpacRedemptionExtractionRepo.test.ts @@ -0,0 +1,61 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { SpacRedemptionExtractionRepo } from "./SpacRedemptionExtractionRepo"; +import type { SpacRedemptionExtraction } from "./SpacRedemptionExtractionSchema"; + +function row( + p: Partial & Pick +): SpacRedemptionExtraction { + return { + form: "8-K", + filing_date: "2021-05-01", + extractor_id: "redemption", + extractor_version: "1.0.0", + redemption_shares: null, + redemption_amount: null, + price_per_share: null, + confidence: 0.9, + source_span: null, + model_id: null, + created_at: "2026-01-01T00:00:00.000Z", + ...p, + }; +} + +describe("SpacRedemptionExtractionRepo", () => { + let repo: SpacRedemptionExtractionRepo; + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + repo = new SpacRedemptionExtractionRepo(); + }); + + it("round-trips a row and overwrites by accession", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, redemption_amount: 100_000 })); + await repo.save(row({ accession_number: "a1", cik: 5, redemption_amount: 200_000 })); + expect((await repo.getByAccession("a1"))?.redemption_amount).toBe(200_000); + }); + + it("queries all extractions for a CIK", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, redemption_shares: 1000 })); + await repo.save(row({ accession_number: "a2", cik: 5, redemption_shares: 2000 })); + await repo.save(row({ accession_number: "b1", cik: 6, redemption_shares: 3000 })); + expect((await repo.getByCik(5)).length).toBe(2); + expect(await repo.getByCik(99)).toEqual([]); + }); + + it("re-save same accession stays length 1 with updated field", async () => { + await repo.save(row({ accession_number: "a1", cik: 5, price_per_share: 10.0 })); + await repo.save(row({ accession_number: "a1", cik: 5, price_per_share: 10.12 })); + const rows = await repo.getByCik(5); + expect(rows.length).toBe(1); + expect(rows[0].price_per_share).toBe(10.12); + }); +}); diff --git a/src/storage/spac/SpacRedemptionExtractionRepo.ts b/src/storage/spac/SpacRedemptionExtractionRepo.ts new file mode 100644 index 0000000..0a613b5 --- /dev/null +++ b/src/storage/spac/SpacRedemptionExtractionRepo.ts @@ -0,0 +1,34 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { globalServiceRegistry } from "workglow"; +import { + SpacRedemptionExtraction, + SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN, + SpacRedemptionExtractionRepositoryStorage, +} from "./SpacRedemptionExtractionSchema"; + +/** Per-accession redemption-extraction rows. */ +export class SpacRedemptionExtractionRepo { + private readonly storage: SpacRedemptionExtractionRepositoryStorage; + + constructor(storage?: SpacRedemptionExtractionRepositoryStorage) { + this.storage = storage ?? globalServiceRegistry.get(SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN); + } + + async save(row: SpacRedemptionExtraction): Promise { + await this.storage.put(row); + } + + async getByAccession(accession_number: string): Promise { + return this.storage.get({ accession_number }); + } + + /** All extractions for a CIK (unordered). */ + async getByCik(cik: number): Promise { + return (await this.storage.query({ cik })) || []; + } +} diff --git a/src/storage/spac/SpacRedemptionExtractionSchema.ts b/src/storage/spac/SpacRedemptionExtractionSchema.ts new file mode 100644 index 0000000..54d3c98 --- /dev/null +++ b/src/storage/spac/SpacRedemptionExtractionSchema.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Static, Type } from "typebox"; +import type { ITabularStorage } from "workglow"; +import { createServiceToken } from "workglow"; +import { TypeNullable } from "../../util/TypeBoxUtil"; + +/** One row per redemption-extraction filing. Current-state: a re-extraction overwrites by accession. */ +export const SpacRedemptionExtractionSchema = Type.Object({ + accession_number: Type.String({ maxLength: 25 }), + cik: Type.Integer({ minimum: 0, description: "SPAC origin CIK (filer)" }), + form: Type.String({ maxLength: 20 }), + filing_date: Type.String({ format: "date" }), + extractor_id: Type.String({ maxLength: 32 }), + extractor_version: Type.String({ maxLength: 32 }), + redemption_shares: TypeNullable(Type.Integer({ minimum: 0 })), + redemption_amount: TypeNullable(Type.Number()), + price_per_share: TypeNullable(Type.Number()), + confidence: Type.Number(), + source_span: TypeNullable(Type.String({ maxLength: 2000 })), + model_id: TypeNullable(Type.String({ maxLength: 128 })), + created_at: Type.String({ format: "date-time" }), +}); + +export type SpacRedemptionExtraction = Static; + +export const SpacRedemptionExtractionPrimaryKeyNames = ["accession_number"] as const; +export type SpacRedemptionExtractionRepositoryStorage = ITabularStorage< + typeof SpacRedemptionExtractionSchema, + typeof SpacRedemptionExtractionPrimaryKeyNames, + SpacRedemptionExtraction +>; + +export const SPAC_REDEMPTION_EXTRACTION_REPOSITORY_TOKEN = + createServiceToken( + "sec.storage.spacRedemptionExtractionRepository" + ); From d767fac5b9a85d06498f27a696e7fb063273d6ad Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:11:27 +0000 Subject: [PATCH 28/39] feat(sec): register the redemption extractor id --- src/storage/versioning/componentRegistry.test.ts | 4 ++-- src/storage/versioning/extractorIds.test.ts | 1 + src/storage/versioning/extractorIds.ts | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/storage/versioning/componentRegistry.test.ts b/src/storage/versioning/componentRegistry.test.ts index a6e7818..a37b7c1 100644 --- a/src/storage/versioning/componentRegistry.test.ts +++ b/src/storage/versioning/componentRegistry.test.ts @@ -41,7 +41,7 @@ describe("componentRegistry", () => { }); it("listRegisteredComponents returns one entry per extractor and resolver", () => { - // 14 extractors + 4 resolvers (person, company, sponsor-family, underwriter-family). - expect(listRegisteredComponents()).toHaveLength(18); + // 15 extractors + 4 resolvers (person, company, sponsor-family, underwriter-family). + expect(listRegisteredComponents()).toHaveLength(19); }); }); diff --git a/src/storage/versioning/extractorIds.test.ts b/src/storage/versioning/extractorIds.test.ts index 800e5dc..cbbcf02 100644 --- a/src/storage/versioning/extractorIds.test.ts +++ b/src/storage/versioning/extractorIds.test.ts @@ -26,6 +26,7 @@ describe("extractorIds", () => { "D", "S-1", "merger-proxy", + "redemption", ]); }); diff --git a/src/storage/versioning/extractorIds.ts b/src/storage/versioning/extractorIds.ts index 6c387d7..a20f2c6 100644 --- a/src/storage/versioning/extractorIds.ts +++ b/src/storage/versioning/extractorIds.ts @@ -19,6 +19,7 @@ export const EXTRACTOR_IDS = [ "424", "8-K", "merger-proxy", + "redemption", ] as const; export type ExtractorId = (typeof EXTRACTOR_IDS)[number]; From 50e55e3922ed15b7a43c366e80f4b977ba1e651b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:15:41 +0000 Subject: [PATCH 29/39] feat(sec): correlate redemption extractions onto spac deals Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- src/storage/spac/SpacReportWriter.ts | 2 +- .../spac/spacDealGrouping.redemption.test.ts | 86 +++++++++++++++++++ src/storage/spac/spacDealGrouping.test.ts | 16 +++- src/storage/spac/spacDealGrouping.ts | 45 ++++++++-- 4 files changed, 139 insertions(+), 10 deletions(-) create mode 100644 src/storage/spac/spacDealGrouping.redemption.test.ts diff --git a/src/storage/spac/SpacReportWriter.ts b/src/storage/spac/SpacReportWriter.ts index 5003d49..a1cab83 100644 --- a/src/storage/spac/SpacReportWriter.ts +++ b/src/storage/spac/SpacReportWriter.ts @@ -165,7 +165,7 @@ export class SpacReportWriter { this.mergerExtractions.getByCik(cik), this.repo.getDeals(cik), ]); - const deals = deriveDeals(cik, events, extractions, existingDeals); + const deals = deriveDeals(cik, events, extractions, [], existingDeals); for (const deal of deals) await this.repo.saveDeal(deal); } diff --git a/src/storage/spac/spacDealGrouping.redemption.test.ts b/src/storage/spac/spacDealGrouping.redemption.test.ts new file mode 100644 index 0000000..c5c4749 --- /dev/null +++ b/src/storage/spac/spacDealGrouping.redemption.test.ts @@ -0,0 +1,86 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { describe, expect, it } from "bun:test"; +import { deriveDeals } from "./spacDealGrouping"; +import type { SpacEvent } from "./SpacEventSchema"; +import type { SpacRedemptionExtraction } from "./SpacRedemptionExtractionSchema"; + +const ev = (event_type: string, event_date: string, acc: string): SpacEvent => + ({ + cik: 1, + accession_number: acc, + event_type, + event_date, + form: "8-K", + primary_document: null, + source_document_url: null, + deal_index: null, + amount: null, + shares: null, + detail: null, + confidence: null, + created_at: "2026-01-01T00:00:00.000Z", + }) as unknown as SpacEvent; + +const red = ( + acc: string, + filing_date: string, + shares: number | null, + amount: number | null +): SpacRedemptionExtraction => ({ + accession_number: acc, + cik: 1, + form: "8-K", + filing_date, + extractor_id: "redemption", + extractor_version: "1.0.0", + redemption_shares: shares, + redemption_amount: amount, + price_per_share: null, + confidence: 0.9, + source_span: "x", + model_id: "fake", + created_at: "2026-01-01T00:00:00.000Z", +}); + +describe("deriveDeals redemption correlation", () => { + it("attaches a redemption filed at/after the deal's completion date", () => { + const events = [ + ev("definitive_agreement", "2026-01-10", "da-1"), + ev("completed", "2026-03-20", "close-1"), + ]; + const deals = deriveDeals(1, events, [], [red("r-1", "2026-03-20", 500000, 5_100_000)], []); + expect(deals).toHaveLength(1); + expect(deals[0].redemption_amount).toBe(5_100_000); + expect(deals[0].redemption_shares).toBe(500000); + }); + + it("buckets redemptions by announcement window across two deals", () => { + const events = [ + ev("definitive_agreement", "2026-01-10", "da-1"), + ev("terminated", "2026-02-15", "term-1"), + ev("definitive_agreement", "2026-04-01", "da-2"), + ev("completed", "2026-06-01", "close-2"), + ]; + const reds = [red("r-1", "2026-02-10", 100, 1000), red("r-2", "2026-06-01", 200, 2000)]; + const deals = deriveDeals(1, events, [], reds, []); + expect(deals[0].redemption_amount).toBe(1000); + expect(deals[1].redemption_amount).toBe(2000); + }); + + it("latest redemption filing wins; non-null survives a later null", () => { + const events = [ev("definitive_agreement", "2026-01-10", "da-1")]; + const reds = [red("r-1", "2026-03-01", 100, 1000), red("r-2", "2026-03-05", 150, null)]; + const deals = deriveDeals(1, events, [], reds, []); + expect(deals[0].redemption_shares).toBe(150); + expect(deals[0].redemption_amount).toBe(1000); + }); + + it("leaves redemptions unattached when there is no deal", () => { + const deals = deriveDeals(1, [], [], [red("r-1", "2026-03-01", 100, 1000)], []); + expect(deals).toEqual([]); + }); +}); diff --git a/src/storage/spac/spacDealGrouping.test.ts b/src/storage/spac/spacDealGrouping.test.ts index dbaeace..dcc4ba1 100644 --- a/src/storage/spac/spacDealGrouping.test.ts +++ b/src/storage/spac/spacDealGrouping.test.ts @@ -86,6 +86,7 @@ describe("deriveDeals", () => { ev("completed", "2021-06-15"), ], [], + [], [] ); expect(deals.length).toBe(1); @@ -107,6 +108,7 @@ describe("deriveDeals", () => { ev("completed", "2021-09-01"), ], [], + [], [] ); expect(deals.map((d) => d.deal_index)).toEqual([0, 1]); @@ -118,12 +120,12 @@ describe("deriveDeals", () => { }); it("ignores an extension vote with no open deal", () => { - const deals = deriveDeals(1, [ev("vote", "2021-04-01")], [], []); + const deals = deriveDeals(1, [ev("vote", "2021-04-01")], [], [], []); expect(deals.length).toBe(0); }); it("opens an already-completed deal when 2.01 has no preceding DA", () => { - const deals = deriveDeals(1, [ev("completed", "2021-09-01")], [], []); + const deals = deriveDeals(1, [ev("completed", "2021-09-01")], [], [], []); expect(deals.length).toBe(1); expect(deals[0].outcome).toBe("completed"); expect(deals[0].announced_date).toBeNull(); @@ -140,6 +142,7 @@ describe("deriveDeals", () => { ev("completed", "2021-09-01"), ], [], + [], [] ); const shuffled = deriveDeals( @@ -151,6 +154,7 @@ describe("deriveDeals", () => { ev("terminated", "2021-02-01"), ], [], + [], [] ); // created_at is a wall-clock stamp for new rows; compare the derived fields. @@ -160,7 +164,7 @@ describe("deriveDeals", () => { it("preserves created_at from an existing deal row", () => { const existing = [deal({ deal_index: 0, outcome: "pending", created_at: "2020-01-01T00:00:00.000Z" })]; - const deals = deriveDeals(1, [ev("definitive_agreement", "2021-03-01")], [], existing); + const deals = deriveDeals(1, [ev("definitive_agreement", "2021-03-01")], [], [], existing); expect(deals[0].created_at).toBe("2020-01-01T00:00:00.000Z"); }); @@ -169,6 +173,7 @@ describe("deriveDeals", () => { 1, [ev("definitive_agreement", "2021-03-01"), ev("completed", "2021-06-15")], [ext("p1", "2021-05-01", { target_name: "Acme Target Inc.", pipe_amount: 150_000_000 })], + [], [] ); expect(deals.length).toBe(1); @@ -184,6 +189,7 @@ describe("deriveDeals", () => { ext("prem", "2021-04-01", { form: "PREM14A", target_name: "Acme Target Inc.", pipe_amount: null }), ext("defm", "2021-05-10", { form: "DEFM14A", target_name: "Acme Target, Inc.", pipe_amount: 200_000_000 }), ], + [], [] ); expect(deals[0].target_name).toBe("Acme Target, Inc."); // definitive wins @@ -192,7 +198,7 @@ describe("deriveDeals", () => { it("leaves an extraction with no matching open deal unattached", () => { // proxy filed before any DA event -> no deal yet - const deals = deriveDeals(1, [], [ext("p1", "2021-05-01", { target_name: "Acme" })], []); + const deals = deriveDeals(1, [], [ext("p1", "2021-05-01", { target_name: "Acme" })], [], []); expect(deals.length).toBe(0); }); @@ -209,6 +215,7 @@ describe("deriveDeals", () => { ext("p0", "2021-01-20", { target_name: "First Target" }), ext("p1", "2021-06-01", { target_name: "Second Target" }), ], + [], [] ); expect(deals.map((d) => d.target_name)).toEqual(["First Target", "Second Target"]); @@ -219,6 +226,7 @@ describe("deriveDeals", () => { 1, [ev("definitive_agreement", "2021-03-01"), ev("proxy", "2021-05-20")], [], + [], [] ); expect(deals[0].proxy_date).toBe("2021-05-20"); diff --git a/src/storage/spac/spacDealGrouping.ts b/src/storage/spac/spacDealGrouping.ts index 495e08f..deb32e8 100644 --- a/src/storage/spac/spacDealGrouping.ts +++ b/src/storage/spac/spacDealGrouping.ts @@ -7,6 +7,7 @@ import type { SpacDeal, SpacDealOutcome } from "./SpacDealSchema"; import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; import type { SpacMergerExtraction } from "./SpacMergerExtractionSchema"; +import type { SpacRedemptionExtraction } from "./SpacRedemptionExtractionSchema"; /** Event types that shape a business-combination attempt. */ const DEAL_RELEVANT_EVENT_TYPES: readonly SpacEventType[] = [ @@ -30,6 +31,9 @@ interface DealSkeleton { target_name: string | null; target_cik: number | null; pipe_amount: number | null; + // Columns derived by correlating redemption extractions (below). + redemption_amount: number | null; + redemption_shares: number | null; } /** @@ -46,13 +50,18 @@ interface DealSkeleton { * §4b-owned columns (`target_name`, `target_cik`, `pipe_amount`) are **derived** * by correlating each {@link SpacMergerExtraction} to the deal whose * `[announced, closed)` window contains the proxy's `filing_date` (definitive - * supersedes preliminary; latest non-null wins). Redemption actuals are deferred - * (post-vote 8-K) and stay null. `created_at` is preserved from any existing row. + * supersedes preliminary; latest non-null wins). Redemption columns + * (`redemption_amount`, `redemption_shares`) are derived from + * {@link SpacRedemptionExtraction} rows by announcement window (upper bound is the + * next deal's announcement, not the current deal's outcome_date, so a redemption + * reported at or after closing still attaches to that deal). `created_at` is + * preserved from any existing row. */ export function deriveDeals( cik: number, events: readonly SpacEvent[], mergerExtractions: readonly SpacMergerExtraction[], + redemptionExtractions: readonly SpacRedemptionExtraction[], existingDeals: readonly SpacDeal[] ): SpacDeal[] { const relevant = events @@ -80,6 +89,8 @@ export function deriveDeals( target_name: null, target_cik: null, pipe_amount: null, + redemption_amount: null, + redemption_shares: null, }; skeletons.push(d); return d; @@ -159,6 +170,30 @@ export function deriveDeals( } } + // --- Correlate redemption extractions onto deals by announcement window --- + // A deal owns [lower, nextLower): lower = its announced/DA/outcome date, upper + // = the next deal's same lower bound. Unlike the merger window this ignores + // outcome_date for the upper bound, so a redemption reported at/after closing + // still attaches to the deal being closed. + const dealLower = (d: DealSkeleton): string | null => + d.announced_date ?? d.definitive_agreement_date ?? d.outcome_date ?? null; + for (let i = 0; i < skeletons.length; i++) { + const d = skeletons[i]; + const lower = dealLower(d); + const upper = skeletons[i + 1] ? dealLower(skeletons[i + 1]) : null; + const matched = redemptionExtractions + .filter( + (r) => + (lower == null || r.filing_date >= lower) && (upper == null || r.filing_date < upper) + ) + .sort((a, b) => a.filing_date.localeCompare(b.filing_date)); + // Latest non-null wins per field; earlier non-nulls survive when a later filing omits them. + for (const r of matched) { + if (r.redemption_amount != null) d.redemption_amount = r.redemption_amount; + if (r.redemption_shares != null) d.redemption_shares = r.redemption_shares; + } + } + const existingByIndex = new Map(existingDeals.map((d) => [d.deal_index, d])); return skeletons.map((s) => ({ cik, @@ -169,9 +204,9 @@ export function deriveDeals( pipe_amount: s.pipe_amount, // proxy_date: derived from the proxy event in the walk. proxy_date: s.proxy_date, - // redemption actuals: deferred (post-vote 8-K) — no source yet. - redemption_amount: null, - redemption_shares: null, + // Columns derived from correlated redemption extractions. + redemption_amount: s.redemption_amount, + redemption_shares: s.redemption_shares, // 8-K-owned columns: announced_date: s.announced_date, definitive_agreement_date: s.definitive_agreement_date, From a46527fde4a6c0821155de9e18b63d34435a5337 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:20:47 +0000 Subject: [PATCH 30/39] feat(sec): read redemption extractions in deal recompute Thread SpacRedemptionExtractionRepo through recomputeAndSaveDeals, replacing the [] stopgap with actual redemption data from the repo. --- src/storage/spac/SpacReportWriter.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/storage/spac/SpacReportWriter.ts b/src/storage/spac/SpacReportWriter.ts index a1cab83..bd18e8c 100644 --- a/src/storage/spac/SpacReportWriter.ts +++ b/src/storage/spac/SpacReportWriter.ts @@ -9,6 +9,7 @@ import { SpacRepo } from "./SpacRepo"; import { buildSpacRow, type SpacRowPatch } from "./spacRollup"; import { deriveDeals } from "./spacDealGrouping"; import { SpacMergerExtractionRepo } from "./SpacMergerExtractionRepo"; +import { SpacRedemptionExtractionRepo } from "./SpacRedemptionExtractionRepo"; import type { Spac } from "./SpacSchema"; import type { SpacEvent, SpacEventType } from "./SpacEventSchema"; import type { SpacHistory } from "./SpacHistorySchema"; @@ -72,6 +73,7 @@ const TRACKED_FIELDS: readonly (keyof Spac)[] = [ export class SpacReportWriter { private readonly repo: SpacRepo; private readonly mergerExtractions = new SpacMergerExtractionRepo(); + private readonly redemptionExtractions = new SpacRedemptionExtractionRepo(); constructor(repo: SpacRepo = new SpacRepo()) { this.repo = repo; @@ -160,12 +162,13 @@ export class SpacReportWriter { * (the single derivation path shared by the 8-K and merger-proxy writers). */ private async recomputeAndSaveDeals(cik: number): Promise { - const [events, extractions, existingDeals] = await Promise.all([ + const [events, extractions, redemptions, existingDeals] = await Promise.all([ this.repo.getEvents(cik), this.mergerExtractions.getByCik(cik), + this.redemptionExtractions.getByCik(cik), this.repo.getDeals(cik), ]); - const deals = deriveDeals(cik, events, extractions, [], existingDeals); + const deals = deriveDeals(cik, events, extractions, redemptions, existingDeals); for (const deal of deals) await this.repo.saveDeal(deal); } From 65cabd586e978405a040d79139d328c3a1c1baa5 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:26:32 +0000 Subject: [PATCH 31/39] feat(sec): extract redemptions from 8-K narrative and roll up Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../miscellaneous-filings/Form_8_K.storage.ts | 18 ++ .../redemption8k.test.ts | 187 ++++++++++++++++++ .../miscellaneous-filings/redemption8k.ts | 130 ++++++++++++ .../spac8kRedemptionTriggers.ts | 15 ++ src/storage/spac/SpacReportWriter.ts | 18 ++ 5 files changed, 368 insertions(+) create mode 100644 src/sec/forms/miscellaneous-filings/redemption8k.test.ts create mode 100644 src/sec/forms/miscellaneous-filings/redemption8k.ts create mode 100644 src/sec/forms/miscellaneous-filings/spac8kRedemptionTriggers.ts diff --git a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts index ec2c719..d0f7035 100644 --- a/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts +++ b/src/sec/forms/miscellaneous-filings/Form_8_K.storage.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type { ModelConfig } from "workglow"; import { Form8KEventRepo } from "../../../storage/form-8k-event/Form8KEventRepo"; import type { Form8KEvent } from "../../../storage/form-8k-event/Form8KEventSchema"; import type { Form8K } from "./Form_8_K.schema"; @@ -11,6 +12,7 @@ import { Form_8_K_ITEMS } from "./Form_8_K"; import { SpacRepo } from "../../../storage/spac/SpacRepo"; import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; import { mapItemCodesToSpacEvents } from "./spac8kMilestones"; +import { processRedemption8K } from "./redemption8k"; /** * Extracts item codes from the filing metadata `items` field. @@ -51,6 +53,8 @@ export async function processForm8K({ items, report_date, form8K, + fullSubmissionText, + model, }: { readonly cik: number; readonly accession_number: string; @@ -59,6 +63,8 @@ export async function processForm8K({ readonly items: string | undefined | null; readonly report_date: string | undefined | null; readonly form8K: Form8K; + readonly fullSubmissionText?: string; + readonly model?: ModelConfig; }): Promise { const eventRepo = new Form8KEventRepo(); const isAmendment = form === "8-K/A"; @@ -100,4 +106,16 @@ export async function processForm8K({ }); } } + + if (spacRow && fullSubmissionText) { + await processRedemption8K({ + cik, + accession_number, + filing_date, + form, + itemCodes, + fullSubmissionText, + model, + }); + } } diff --git a/src/sec/forms/miscellaneous-filings/redemption8k.test.ts b/src/sec/forms/miscellaneous-filings/redemption8k.test.ts new file mode 100644 index 0000000..e6f28a4 --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/redemption8k.test.ts @@ -0,0 +1,187 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacRedemptionExtractionRepo } from "../../../storage/spac/SpacRedemptionExtractionRepo"; +import { + fakeS1Model, + registerFakeStructuredProvider, +} from "../registration-statements/s1/testing/fakeStructuredProvider"; +import { hasRedemptionTriggerItem } from "./spac8kRedemptionTriggers"; +import { processRedemption8K } from "./redemption8k"; + +const FULL_TXT = + "\nACCESSION NUMBER: 0000000000-26-000009\n\n" + + "\n8-K\n1\n\n

Vote results.

\n
\n
\n" + + "\nEX-99.1\n2\n\n" + + "

Holders of 1,234,567 shares elected to redeem for $12,400,000.

\n" + + "
\n
\n"; + +describe("processRedemption8K", () => { + let cleanup: (() => void) | undefined; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + afterEach(() => { + cleanup?.(); + cleanup = undefined; + }); + + it("hasRedemptionTriggerItem matches trigger codes only", () => { + expect(hasRedemptionTriggerItem("5.07,9.01")).toBe(true); + expect(hasRedemptionTriggerItem("2.02")).toBe(false); + expect(hasRedemptionTriggerItem(null)).toBe(false); + }); + + async function seedSpacWithOpenDeal(cik: number): Promise { + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Redeem SPAC Inc.", + spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2026-01-10", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2026-01-10" }], + }); + } + + it("extracts a redemption and derives it onto the open deal", async () => { + await seedSpacWithOpenDeal(42); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1234567, + redemption_amount: 12400000, + price_per_share: 10.05, + confidence: 0.95, + source_span: "1,234,567 shares elected to redeem for $12,400,000", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 42, + accession_number: "0000000000-26-000009", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["5.07"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + const ext = await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000009"); + expect(ext?.redemption_amount).toBe(12400000); + expect(ext?.redemption_shares).toBe(1234567); + + const deals = await new SpacRepo().getDeals(42); + expect(deals[0].redemption_amount).toBe(12400000); + expect(deals[0].redemption_shares).toBe(1234567); + }); + + it("writes nothing without a trigger item", async () => { + await seedSpacWithOpenDeal(43); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1, + redemption_amount: 1, + price_per_share: 10, + confidence: 0.95, + source_span: "elected to redeem", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 43, + accession_number: "0000000000-26-000010", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["9.01"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + expect( + await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000010") + ).toBeUndefined(); + }); + + it("writes nothing for a CIK with no spac row (gate)", async () => { + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1, + redemption_amount: 1, + price_per_share: 10, + confidence: 0.95, + source_span: "elected to redeem", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 99, + accession_number: "0000000000-26-000011", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["5.07"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + expect( + await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000011") + ).toBeUndefined(); + }); + + it("does not extract when the SPAC has no deals", async () => { + await new SpacReportWriter().recordRegistration({ + cik: 44, + accession_number: "44-reg", + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Dealless SPAC Inc.", + spac_sic: 6770, + }); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 1, + redemption_amount: 1, + price_per_share: 10, + confidence: 0.95, + source_span: "elected to redeem", + }, + ]); + cleanup = registration.unregister; + + await processRedemption8K({ + cik: 44, + accession_number: "0000000000-26-000012", + filing_date: "2026-03-20", + form: "8-K", + itemCodes: ["5.07"], + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + expect( + await new SpacRedemptionExtractionRepo().getByAccession("0000000000-26-000012") + ).toBeUndefined(); + }); +}); diff --git a/src/sec/forms/miscellaneous-filings/redemption8k.ts b/src/sec/forms/miscellaneous-filings/redemption8k.ts new file mode 100644 index 0000000..c49b202 --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/redemption8k.ts @@ -0,0 +1,130 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import type { ModelConfig } from "workglow"; +import { globalServiceRegistry, renderMarkdown } from "workglow"; +import { parseEdgarHtml } from "../../html/parseEdgarHtml"; +import { parseEightKSubmission } from "../registration-statements/s1/parseSubmission"; +import { makeRunSection } from "../registration-statements/s1/sectionRunner"; +import { spanAppearsIn } from "../registration-statements/s1/verifySourceSpan"; +import { extractRedemption } from "../registration-statements/s1/sectionExtractors"; +import type { RedemptionRow } from "../registration-statements/s1/redemptionSchema"; +import { + getRedemptionModel, + getRedemptionConfidenceFloor, + resolveModelId, +} from "../registration-statements/s1/redemptionModel"; +import { VersionRegistry } from "../../../storage/versioning/VersionRegistry"; +import { getActiveSlot } from "../../../storage/versioning/getActiveSlot"; +import { COMPONENT_VERSION_REPOSITORY_TOKEN } from "../../../storage/versioning/ComponentVersionSchema"; +import { ExtractionDeadLetterRepo } from "../../../storage/dead-letter/ExtractionDeadLetterRepo"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { SpacRedemptionExtractionRepo } from "../../../storage/spac/SpacRedemptionExtractionRepo"; +import { REDEMPTION_TRIGGER_ITEMS } from "./spac8kRedemptionTriggers"; + +const EXTRACTOR_ID = "redemption"; +const DEFAULT_EXTRACTOR_VERSION = "1.0.0"; +const REDEMPTION_SECTION = "redemption"; + +export interface ProcessRedemption8KArgs { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + readonly itemCodes: readonly string[]; + readonly fullSubmissionText: string; + readonly model?: ModelConfig; +} + +/** Renders an EDGAR HTML body to plain markdown text (source-span verifiable). */ +function renderBody(html: string, title: string): string { + const doc = parseEdgarHtml(html, title); + return doc.children + .map((n) => renderMarkdown(n)) + .filter((s) => s.length > 0) + .join("\n\n") + .trim(); +} + +/** + * AI-extract realized redemptions from a known SPAC's vote-results / closing + * 8-K (primary document + EX-99.x exhibits). Gated on a trigger item and an + * existing deal to attach to. Persists a redemption-extraction row and + * recomputes deals so the redemption is correlated onto the matching deal. + */ +export async function processRedemption8K(args: ProcessRedemption8KArgs): Promise { + const { cik, accession_number, filing_date, form, itemCodes, fullSubmissionText } = args; + + if (!itemCodes.some((c) => REDEMPTION_TRIGGER_ITEMS.includes(c))) return; + + const spacRepo = new SpacRepo(); + const spac = await spacRepo.getSpac(cik); + if (!spac) return; + const deals = await spacRepo.getDeals(cik); + if (deals.length === 0) return; + + const versionRegistry = new VersionRegistry( + globalServiceRegistry.get(COMPONENT_VERSION_REPOSITORY_TOKEN) + ); + const extractorSlot = await getActiveSlot(versionRegistry, "extractor", EXTRACTOR_ID); + const extractor_version = extractorSlot?.semver ?? DEFAULT_EXTRACTOR_VERSION; + const deadLetters = new ExtractionDeadLetterRepo(); + const model = args.model ?? (await getRedemptionModel()); + const model_id = resolveModelId(model); + + const { primaryHtml, exhibitsHtml } = parseEightKSubmission(form, fullSubmissionText); + const text = [primaryHtml, ...exhibitsHtml] + .map((h, i) => renderBody(h, `${form} ${accession_number} #${i}`)) + .filter((t) => t.length > 0) + .join("\n\n"); + + const runSection = makeRunSection({ + deadLetters, + extractor_id: EXTRACTOR_ID, + extractor_version, + accession_number, + confidenceFloor: getRedemptionConfidenceFloor(), + }); + + let persisted = 0; + await runSection({ + sectionName: REDEMPTION_SECTION, + text: text === "" ? undefined : text, + notFoundDetail: "no primary/EX-99 narrative text", + emptyDetail: "no redemption returned", + lowConfidenceDetail: "below confidence floor", + verifyRow: (t, r) => spanAppearsIn(t, r.source_span), + unverifiedAllDetail: "redemption source_span not present in narrative text", + extract: async (t) => { + const row = await extractRedemption(t, model); + return row === null ? [] : [row]; + }, + persist: async (rows) => { + const row = rows[0]; + await new SpacRedemptionExtractionRepo().save({ + accession_number, + cik, + form, + filing_date, + extractor_id: EXTRACTOR_ID, + extractor_version, + redemption_shares: row.redemption_shares, + redemption_amount: row.redemption_amount, + price_per_share: row.price_per_share, + confidence: row.confidence, + source_span: row.source_span, + model_id, + created_at: new Date().toISOString(), + }); + persisted = 1; + return 1; + }, + }); + + if (persisted > 0) { + await new SpacReportWriter().recordRedemption({ cik, accession_number, filing_date, form }); + } +} diff --git a/src/sec/forms/miscellaneous-filings/spac8kRedemptionTriggers.ts b/src/sec/forms/miscellaneous-filings/spac8kRedemptionTriggers.ts new file mode 100644 index 0000000..0021a5e --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/spac8kRedemptionTriggers.ts @@ -0,0 +1,15 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +/** 8-K item codes that can carry realized SPAC redemptions. */ +export const REDEMPTION_TRIGGER_ITEMS: readonly string[] = ["5.07", "2.01", "8.01"]; + +/** True when a comma/semicolon-separated items string contains a trigger code. */ +export function hasRedemptionTriggerItem(items: string | null | undefined): boolean { + if (!items) return false; + const set = new Set(items.split(/[,;]/).map((s) => s.trim())); + return REDEMPTION_TRIGGER_ITEMS.some((code) => set.has(code)); +} diff --git a/src/storage/spac/SpacReportWriter.ts b/src/storage/spac/SpacReportWriter.ts index bd18e8c..b969f11 100644 --- a/src/storage/spac/SpacReportWriter.ts +++ b/src/storage/spac/SpacReportWriter.ts @@ -157,6 +157,24 @@ export class SpacReportWriter { await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); } + /** + * Record a realized redemption: recompute deals from the event stream + + * stored redemption extractions (correlation derives redemption_amount / + * redemption_shares onto the matching deal), then rebuild the row. No event + * is appended — redemptions never advance the lifecycle and an extra event + * would double-count in the rollup. The extraction itself is persisted by the + * caller (`processRedemption8K`) before this runs. + */ + async recordRedemption(args: { + readonly cik: number; + readonly accession_number: string; + readonly filing_date: string; + readonly form: string; + }): Promise { + await this.recomputeAndSaveDeals(args.cik); + await this.rebuild(args.cik, args.filing_date, `${args.form}:${args.accession_number}`, {}); + } + /** * Rebuild the deal set from the CIK's full event stream + merger extractions * (the single derivation path shared by the 8-K and merger-proxy writers). From 8801ca9cb6e1332a808d82630c0c4f7d8dc89fd2 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:33:17 +0000 Subject: [PATCH 32/39] feat(sec): escalate known-SPAC trigger 8-Ks to full submission fetch Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- ...essAccessionDocFormTask.redemption.test.ts | 120 ++++++++++++++++++ src/task/forms/ProcessAccessionDocFormTask.ts | 26 +++- 2 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 src/task/forms/ProcessAccessionDocFormTask.redemption.test.ts diff --git a/src/task/forms/ProcessAccessionDocFormTask.redemption.test.ts b/src/task/forms/ProcessAccessionDocFormTask.redemption.test.ts new file mode 100644 index 0000000..48fea4f --- /dev/null +++ b/src/task/forms/ProcessAccessionDocFormTask.redemption.test.ts @@ -0,0 +1,120 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import type { IExecuteContext } from "workglow"; +import { globalServiceRegistry } from "workglow"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; +import { SpacReportWriter } from "../../storage/spac/SpacReportWriter"; +import { ProcessAccessionDocFormTask } from "./ProcessAccessionDocFormTask"; + +class CapturingTask extends ProcessAccessionDocFormTask { + public readonly fetched: string[] = []; + + protected override async runFetch( + _cik: number, + _accessionNumber: string, + fileName: string, + _context: IExecuteContext + ): Promise { + this.fetched.push(fileName); + return ""; + } +} + +async function seedSpac(cik: number): Promise { + await new SpacReportWriter().recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Redeem SPAC Inc.", + spac_sic: 6770, + }); +} + +async function seedFiling(opts: { + readonly cik: number; + readonly accession_number: string; + readonly form: string; + readonly primary_doc: string; + readonly items: string; +}): Promise { + const repo = globalServiceRegistry.get(FILING_REPOSITORY_TOKEN); + await repo.put({ + cik: opts.cik, + accession_number: opts.accession_number, + form: opts.form, + primary_doc: opts.primary_doc, + file_number: "", + filing_date: "2026-03-20", + acceptance_date: "2026-03-20T00:00:00.000Z", + report_date: "2026-03-19", + film_number: null, + primary_doc_description: null, + size: null, + is_xbrl: null, + is_inline_xbrl: null, + items: opts.items, + act: null, + } as never); +} + +describe("ProcessAccessionDocFormTask redemption fetch escalation", () => { + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + + it("fetches the full .txt for a known-SPAC trigger-item 8-K", async () => { + const accession = "0000000000-26-000007"; + await seedSpac(7); + await seedFiling({ + cik: 7, + accession_number: accession, + form: "8-K", + primary_doc: "primary.htm", + items: "5.07,9.01", + }); + const task = new CapturingTask(); + await task.run({ accessionNumber: accession }); + expect(task.fetched).toContain(`${accession}.txt`); + }); + + it("keeps the primary-doc fetch for a non-trigger item", async () => { + const accession = "0000000000-26-000008"; + await seedSpac(7); + await seedFiling({ + cik: 7, + accession_number: accession, + form: "8-K", + primary_doc: "primary.htm", + items: "2.02", + }); + const task = new CapturingTask(); + await task.run({ accessionNumber: accession }); + expect(task.fetched).toContain("primary.htm"); + expect(task.fetched).not.toContain(`${accession}.txt`); + }); + + it("keeps the primary-doc fetch for a non-SPAC CIK", async () => { + const accession = "0000000000-26-000010"; + await seedFiling({ + cik: 99, + accession_number: accession, + form: "8-K", + primary_doc: "primary.htm", + items: "5.07", + }); + const task = new CapturingTask(); + await task.run({ accessionNumber: accession }); + expect(task.fetched).toContain("primary.htm"); + expect(task.fetched).not.toContain(`${accession}.txt`); + }); +}); diff --git a/src/task/forms/ProcessAccessionDocFormTask.ts b/src/task/forms/ProcessAccessionDocFormTask.ts index 61cdd24..1d02908 100644 --- a/src/task/forms/ProcessAccessionDocFormTask.ts +++ b/src/task/forms/ProcessAccessionDocFormTask.ts @@ -26,7 +26,9 @@ import { processFormS1 } from "../../sec/forms/registration-statements/Form_S_1. import { processForm424 } from "../../sec/forms/registration-statements/Form_424.storage"; import { processForm8K } from "../../sec/forms/miscellaneous-filings/Form_8_K.storage"; import { processMergerProxy } from "../../sec/forms/proxies-information-statements/Form_DEFM14A.storage"; +import { hasRedemptionTriggerItem } from "../../sec/forms/miscellaneous-filings/spac8kRedemptionTriggers"; import { TypeSecCik } from "../../sec/submissions/EnititySubmissionSchema"; +import { SpacRepo } from "../../storage/spac/SpacRepo"; import { ExtractionDeadLetterRepo } from "../../storage/dead-letter/ExtractionDeadLetterRepo"; import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; import { COMPONENT_VERSION_REPOSITORY_TOKEN } from "../../storage/versioning/ComponentVersionSchema"; @@ -193,6 +195,21 @@ export class ProcessAccessionDocFormTask extends Task< fileName = fullSubmissionFileName(accessionNumber); } + // Known-SPAC 8-Ks carrying a redemption-trigger item are fetched as the full + // submission .txt so the redemption pass can read the EX-99 vote-results + // exhibit, not just the primary document. Other 8-Ks keep their primary-doc + // fetch. + let redemptionFullSubmission = false; + if ( + (form === "8-K" || form === "8-K/A") && + hasRedemptionTriggerItem(items) && + cik !== undefined && + (await new SpacRepo().getSpac(cik)) !== undefined + ) { + fileName = fullSubmissionFileName(accessionNumber); + redemptionFullSubmission = true; + } + const extractorId = formToExtractorId(form); if (!extractorId) { throw new TaskError(`No extractor registered for form '${form}'`); @@ -356,7 +373,14 @@ export class ProcessAccessionDocFormTask extends Task< break; case "8-K": case "8-K/A": - await processForm8K({ ...storageArgs, form: form!, items, report_date, form8K: parsed }); + await processForm8K({ + ...storageArgs, + form: form!, + items, + report_date, + form8K: parsed, + fullSubmissionText: redemptionFullSubmission ? text : undefined, + }); break; case "DEFM14A": case "PREM14A": From 03317924128a2045ac4cea43a3f373570ea86e5f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:37:47 +0000 Subject: [PATCH 33/39] test(sec): end-to-end redemption extraction + single rollup Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../redemption8k.e2e.test.ts | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 src/sec/forms/miscellaneous-filings/redemption8k.e2e.test.ts diff --git a/src/sec/forms/miscellaneous-filings/redemption8k.e2e.test.ts b/src/sec/forms/miscellaneous-filings/redemption8k.e2e.test.ts new file mode 100644 index 0000000..c23d73c --- /dev/null +++ b/src/sec/forms/miscellaneous-filings/redemption8k.e2e.test.ts @@ -0,0 +1,179 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../../config/TestingDI"; +import { setupAllDatabases } from "../../../config/setupAllDatabases"; +import { SpacRepo } from "../../../storage/spac/SpacRepo"; +import { SpacReportWriter } from "../../../storage/spac/SpacReportWriter"; +import { + fakeS1Model, + registerFakeStructuredProvider, +} from "../registration-statements/s1/testing/fakeStructuredProvider"; +import { processForm8K } from "./Form_8_K.storage"; + +const FULL_TXT = + "\nACCESSION NUMBER: 0000000000-26-000020\n\n" + + "\n8-K\n1\n\n

Vote results.

\n
\n
\n" + + "\nEX-99.1\n2\n\n" + + "

Holders of 800,000 shares elected to redeem for $8,200,000.

\n" + + "
\n
\n"; + +describe("processForm8K — redemption e2e", () => { + let cleanup: (() => void) | undefined; + + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + afterEach(() => { + cleanup?.(); + cleanup = undefined; + }); + + async function seedSpacWithDeal(cik: number): Promise { + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "E2E SPAC Inc.", + spac_sic: 6770, + }); + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2026-01-10", + form: "8-K", + primary_document: null, + events: [{ event_type: "definitive_agreement", event_date: "2026-01-10" }], + }); + } + + it("derives redemption onto deal and rolls up into spac report exactly once", async () => { + await seedSpacWithDeal(20); + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + cleanup = registration.unregister; + + await processForm8K({ + cik: 20, + accession_number: "0000000000-26-000020", + filing_date: "2026-03-20", + form: "8-K", + items: "5.07", + report_date: "2026-03-19", + form8K: {}, + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + const deals = await new SpacRepo().getDeals(20); + expect(deals[0].redemption_amount).toBe(8200000); + + const spacRow = await new SpacRepo().getSpac(20); + expect(spacRow?.total_redemption_amount).toBe(8200000); + }); + + it("is idempotent — reprocessing the same 8-K does not double the redemption amount", async () => { + await seedSpacWithDeal(20); + + const args = { + cik: 20, + accession_number: "0000000000-26-000020", + filing_date: "2026-03-20", + form: "8-K", + items: "5.07", + report_date: "2026-03-19", + form8K: {}, + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + } as const; + + // First call + const reg1 = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + await processForm8K(args); + reg1.unregister(); + + // Second call — same accession, same payload + const reg2 = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + cleanup = reg2.unregister; + await processForm8K(args); + + const deals = await new SpacRepo().getDeals(20); + expect(deals[0].redemption_amount).toBe(8200000); + + const spacRow = await new SpacRepo().getSpac(20); + expect(spacRow?.total_redemption_amount).toBe(8200000); + }); + + it("known SPAC with no deal yields no redemption rollup", async () => { + // Seed SPAC row but no deal milestone + await new SpacReportWriter().recordRegistration({ + cik: 21, + accession_number: "21-reg", + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "No Deal SPAC Inc.", + spac_sic: 6770, + }); + + const registration = registerFakeStructuredProvider([ + { + redemption_shares: 800000, + redemption_amount: 8200000, + price_per_share: 10.25, + confidence: 0.95, + source_span: "800,000 shares elected to redeem for $8,200,000", + }, + ]); + cleanup = registration.unregister; + + await processForm8K({ + cik: 21, + accession_number: "0000000000-26-000021", + filing_date: "2026-03-20", + form: "8-K", + items: "5.07", + report_date: "2026-03-19", + form8K: {}, + fullSubmissionText: FULL_TXT, + model: fakeS1Model(), + }); + + const spacRow = await new SpacRepo().getSpac(21); + expect(spacRow).toBeDefined(); + expect(spacRow?.total_redemption_amount ?? null).toBeNull(); + + const deals = await new SpacRepo().getDeals(21); + expect(deals).toHaveLength(0); + }); +}); From 19b57e45047248a51ef762f77b5c5922e9f377c4 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:47:11 +0000 Subject: [PATCH 34/39] feat(sec): add backfill-redemptions command Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- src/commands/spac.ts | 13 +++ src/storage/spac/SpacRepo.ts | 5 ++ src/task/spac/BackfillRedemptionsTask.test.ts | 89 +++++++++++++++++++ src/task/spac/BackfillRedemptionsTask.ts | 85 ++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 src/task/spac/BackfillRedemptionsTask.test.ts create mode 100644 src/task/spac/BackfillRedemptionsTask.ts diff --git a/src/commands/spac.ts b/src/commands/spac.ts index fb8532f..b1fea2e 100644 --- a/src/commands/spac.ts +++ b/src/commands/spac.ts @@ -6,9 +6,11 @@ import { Command } from "commander"; import { globalServiceRegistry } from "workglow"; +import { withCli } from "@workglow/cli"; import { SpacRepo } from "../storage/spac/SpacRepo"; import { SPAC_SPONSOR_LINK_REPOSITORY_TOKEN } from "../storage/canonical/SpacSponsorLinkSchema"; import { UNDERWRITER_LINK_REPOSITORY_TOKEN } from "../storage/canonical/UnderwriterLinkSchema"; +import { BackfillRedemptionsTask } from "../task/spac/BackfillRedemptionsTask"; export interface SpacReport { readonly cik: number; @@ -110,4 +112,15 @@ export function registerSpacCommands(program: Command): void { ); } }); + + spacCmd + .command("backfill-redemptions") + .description("Re-process known-SPAC trigger-item 8-Ks to extract realized redemptions") + .action(async () => { + const out = (await withCli(new BackfillRedemptionsTask()).run({})) as { + selected: number; + processed: number; + }; + console.log(`selected ${out.selected} filing(s); processed ${out.processed}`); + }); } diff --git a/src/storage/spac/SpacRepo.ts b/src/storage/spac/SpacRepo.ts index ca16be5..259f530 100644 --- a/src/storage/spac/SpacRepo.ts +++ b/src/storage/spac/SpacRepo.ts @@ -49,6 +49,11 @@ export class SpacRepo { return (await this.spacRepository.query({ status })) || []; } + /** Every spac row, regardless of status. */ + async getAllSpacs(): Promise { + return (await this.spacRepository.getAll()) || []; + } + async saveDeal(deal: SpacDeal): Promise { await this.dealRepository.put(deal); } diff --git a/src/task/spac/BackfillRedemptionsTask.test.ts b/src/task/spac/BackfillRedemptionsTask.test.ts new file mode 100644 index 0000000..92d1927 --- /dev/null +++ b/src/task/spac/BackfillRedemptionsTask.test.ts @@ -0,0 +1,89 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { beforeEach, describe, expect, it } from "bun:test"; +import { globalServiceRegistry } from "workglow"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; +import { SpacReportWriter } from "../../storage/spac/SpacReportWriter"; +import { + BackfillRedemptionsTask, + selectRedemptionBackfillAccessions, +} from "./BackfillRedemptionsTask"; + +async function seedSpac(cik: number): Promise { + await new SpacReportWriter().recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Backfill SPAC Inc.", + spac_sic: 6770, + }); +} + +async function seedFiling(opts: { + readonly cik: number; + readonly accession_number: string; + readonly form: string; + readonly items: string; +}): Promise { + const repo = globalServiceRegistry.get(FILING_REPOSITORY_TOKEN); + await repo.put({ + cik: opts.cik, + accession_number: opts.accession_number, + form: opts.form, + primary_doc: "primary.htm", + file_number: "", + filing_date: "2026-03-20", + acceptance_date: "2026-03-20T00:00:00.000Z", + report_date: "2026-03-19", + film_number: null, + primary_doc_description: null, + size: null, + is_xbrl: null, + is_inline_xbrl: null, + items: opts.items, + act: null, + } as never); +} + +describe("selectRedemptionBackfillAccessions", () => { + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + + async function seedFixture(): Promise { + await seedSpac(5); + await seedFiling({ cik: 5, accession_number: "acc-trigger", form: "8-K", items: "5.07" }); + await seedFiling({ cik: 5, accession_number: "acc-trigger-amend", form: "8-K/A", items: "2.01" }); + await seedFiling({ cik: 5, accession_number: "acc-2.02", form: "8-K", items: "2.02" }); + await seedFiling({ cik: 5, accession_number: "acc-10k", form: "10-K", items: "5.07" }); + // Non-SPAC cik: trigger-item 8-K, but no spac row. + await seedFiling({ cik: 6, accession_number: "acc-nonspac", form: "8-K", items: "5.07" }); + } + + it("selects known-SPAC trigger-item 8-Ks (incl. 8-K/A) only", async () => { + await seedFixture(); + + const accessions = await selectRedemptionBackfillAccessions(); + expect(accessions).toContain("acc-trigger"); + expect(accessions).toContain("acc-trigger-amend"); + expect(accessions).not.toContain("acc-2.02"); + expect(accessions).not.toContain("acc-10k"); + expect(accessions).not.toContain("acc-nonspac"); + }); + + it("dry-run reports the selected count without reprocessing", async () => { + await seedFixture(); + + const out = await new BackfillRedemptionsTask().run({ dryRun: true } as any); + expect(out.selected).toBe(2); + expect(out.processed).toBe(0); + }); +}); diff --git a/src/task/spac/BackfillRedemptionsTask.ts b/src/task/spac/BackfillRedemptionsTask.ts new file mode 100644 index 0000000..2422d26 --- /dev/null +++ b/src/task/spac/BackfillRedemptionsTask.ts @@ -0,0 +1,85 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ +import { Static, Type } from "typebox"; +import { globalServiceRegistry, IExecuteContext, Task, Workflow } from "workglow"; +import { FILING_REPOSITORY_TOKEN } from "../../storage/filing/FilingSchema"; +import { SpacRepo } from "../../storage/spac/SpacRepo"; +import { hasRedemptionTriggerItem } from "../../sec/forms/miscellaneous-filings/spac8kRedemptionTriggers"; +import { ProcessAccessionDocFormTask } from "../forms/ProcessAccessionDocFormTask"; + +/** + * Accession numbers of known-SPAC 8-Ks carrying a redemption-trigger item, + * enumerated from the bootstrapped `filing` metadata (no network discovery). + */ +export async function selectRedemptionBackfillAccessions(): Promise { + const filingRepo = globalServiceRegistry.get(FILING_REPOSITORY_TOKEN); + const spacRepo = new SpacRepo(); + const out: string[] = []; + const spacs = await spacRepo.getAllSpacs(); + for (const spac of spacs) { + const filings = (await filingRepo.query({ cik: spac.cik })) ?? []; + for (const f of filings) { + if ((f.form === "8-K" || f.form === "8-K/A") && hasRedemptionTriggerItem(f.items)) { + out.push(f.accession_number); + } + } + } + return out; +} + +const InputSchema = () => + Type.Object({ + dryRun: Type.Optional(Type.Boolean({ default: false })), + }); +export type BackfillRedemptionsTaskInput = Static>; + +const OutputSchema = () => + Type.Object({ + selected: Type.Number(), + processed: Type.Number(), + }); +type BackfillRedemptionsTaskOutput = Static>; + +/** + * Sweeps historical known-SPAC trigger-item 8-Ks and re-runs + * {@link ProcessAccessionDocFormTask} for each so the redemption extractor + * (which now escalates to the full submission and extracts) runs over filings + * that were processed before it existed. + */ +export class BackfillRedemptionsTask extends Task< + BackfillRedemptionsTaskInput, + BackfillRedemptionsTaskOutput +> { + static readonly type = "BackfillRedemptionsTask"; + static readonly category = "SEC"; + static readonly cacheable = false; + + static inputSchema() { + return InputSchema(); + } + + static outputSchema() { + return OutputSchema(); + } + + async execute( + input: BackfillRedemptionsTaskInput, + context: IExecuteContext + ): Promise { + const accessions = await selectRedemptionBackfillAccessions(); + if (input.dryRun) { + return { selected: accessions.length, processed: 0 }; + } + let processed = 0; + for (const accessionNumber of accessions) { + const wf = context.own(new Workflow()); + wf.pipe(new ProcessAccessionDocFormTask()); + await wf.run({ accessionNumber }); + processed++; + } + return { selected: accessions.length, processed }; + } +} From 16882a61e76f862c0e8eede5e3e3f2878088acd9 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 04:52:44 +0000 Subject: [PATCH 35/39] docs(sec): document redemption extraction + backfill command Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- CLAUDE.md | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index ebbf384..84037f8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -200,8 +200,7 @@ retires the 8-K path's positional merge-preserve. Only the **definitive merger** statements `DEFM14A` and `DEFM14C` emit the `proxy` event (→ `proxy_date` / `status = proxy`): a consent deal (14C) has no `8-K 5.07` vote, so the definitive 14C is its only approval-stage signal. Preliminary (`PREM14A`/`PREM14C`) and revised -(`DEFR14A`/`PRER14A`) proxies are extraction-only. Redemption actuals stay null -(post-vote 8-K, deferred) and S-4 is deferred (newco-CIK linkage). Configure the +(`DEFR14A`/`PRER14A`) proxies are extraction-only. S-4 is deferred (newco-CIK linkage). Configure the model via `SEC_MERGER_PROXY_MODEL` (default `claude-sonnet-4-6`) and an optional confidence floor via `SEC_MERGER_PROXY_CONFIDENCE_FLOOR` (falls back to the shared `SEC_S1_CONFIDENCE_FLOOR` when unset). @@ -212,6 +211,23 @@ sec extractor dead-letters merger-proxy # version-fixable extraction failures sec extractor retry-dead-letters merger-proxy ``` +**Redemption actuals** (extractor id `redemption`) are AI-extracted from a known +SPAC's post-vote 8-K narrative. When an 8-K carries item `5.07`, `2.01`, or `8.01` +for a known SPAC, ingestion escalates the fetch to the full submission `.txt` and +reads the primary document + `EX-99.x` exhibits; `processRedemption8K` records a +per-accession `spac_redemption_extraction` row, and `deriveDeals` correlates +`redemption_amount` / `redemption_shares` onto the matching `spac_deal`. The deal +column is the sole source `total_redemption_amount` sums, so redemptions are counted +once. Configure the model via `SEC_REDEMPTION_MODEL` (default `claude-sonnet-4-6`) +and an optional confidence floor via `SEC_REDEMPTION_CONFIDENCE_FLOOR` (falls back to +`SEC_S1_CONFIDENCE_FLOOR`). + +```bash +sec spac backfill-redemptions # sweep historical known-SPAC trigger 8-Ks +sec extractor dead-letters redemption # version-fixable extraction failures +sec extractor retry-dead-letters redemption +``` + ```bash sec spac report [--format json] # consolidated report sec spac history [--format json] # state-change history From 5ad4b8140136cda9b5410abdd97df3fa482fa551 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 05:12:37 +0000 Subject: [PATCH 36/39] fix(sec): pin version-status test to the full bootstrapped extractor set The `sec version CLI` status test's expected extractor list was missing `merger-proxy` and `redemption`, so `db setup` (which bootstraps all extractor ids) produced more rows than asserted. This test lives outside the feature dirs the local runs covered, so CI surfaced it. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- src/cli/groups/version.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cli/groups/version.test.ts b/src/cli/groups/version.test.ts index b4d994f..0641bff 100644 --- a/src/cli/groups/version.test.ts +++ b/src/cli/groups/version.test.ts @@ -64,6 +64,8 @@ describe("sec version CLI", () => { "CFPORTAL", "D", "S-1", + "merger-proxy", + "redemption", ]); const extractorRows = parsed.filter( (r: { component_kind: string }) => r.component_kind === "extractor" From a5f7a24e6e48cab3c571bf6fbb476a0914ff0b51 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 05:15:56 +0000 Subject: [PATCH 37/39] fix(sec): harden redemption extraction from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - deriveDeals: first deal's redemption window is unbounded below, so a vote-results 8-K filed before a completion-only deal's date still attaches (was dropped → total_redemption_amount under-counted). - processRedemption8K: parse/render of filer HTML is wrapped so a malformed body dead-letters the section instead of aborting the filing (restores the "extraction degrades, never aborts" invariant; mirrors the merger-proxy path). - BackfillRedemptionsTask: isolate per-filing failures so one bad 8-K doesn't abort the historical sweep. - redemption schema: minimum:0 on redemption_amount / price_per_share so a negative/sign-error value dead-letters rather than corrupting the rollup. - extractRedemption: drop a figure-less response (null shares AND null amount) so the no-redemption case doesn't persist a meaningless row. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- .../miscellaneous-filings/redemption8k.ts | 27 ++++++++++--- .../s1/extractRedemption.test.ts | 38 +++++++++++++++++++ .../s1/redemptionSchema.ts | 4 +- .../s1/sectionExtractors.ts | 2 + .../spac/spacDealGrouping.redemption.test.ts | 12 ++++++ src/storage/spac/spacDealGrouping.ts | 14 ++++--- src/task/spac/BackfillRedemptionsTask.ts | 14 +++++-- 7 files changed, 95 insertions(+), 16 deletions(-) diff --git a/src/sec/forms/miscellaneous-filings/redemption8k.ts b/src/sec/forms/miscellaneous-filings/redemption8k.ts index c49b202..701b14d 100644 --- a/src/sec/forms/miscellaneous-filings/redemption8k.ts +++ b/src/sec/forms/miscellaneous-filings/redemption8k.ts @@ -75,11 +75,28 @@ export async function processRedemption8K(args: ProcessRedemption8KArgs): Promis const model = args.model ?? (await getRedemptionModel()); const model_id = resolveModelId(model); - const { primaryHtml, exhibitsHtml } = parseEightKSubmission(form, fullSubmissionText); - const text = [primaryHtml, ...exhibitsHtml] - .map((h, i) => renderBody(h, `${form} ${accession_number} #${i}`)) - .filter((t) => t.length > 0) - .join("\n\n"); + // Parsing/rendering filer-supplied HTML must not abort the filing (its 8-K + // events and milestone deals already wrote); a malformed body dead-letters the + // section so a version bump can retry it, mirroring the merger-proxy path. + let text: string; + try { + const { primaryHtml, exhibitsHtml } = parseEightKSubmission(form, fullSubmissionText); + text = [primaryHtml, ...exhibitsHtml] + .map((h, i) => renderBody(h, `${form} ${accession_number} #${i}`)) + .filter((t) => t.length > 0) + .join("\n\n"); + } catch (err) { + await deadLetters.record({ + extractor_id: EXTRACTOR_ID, + accession_number, + section_name: REDEMPTION_SECTION, + reason_code: "PARSE_ERROR", + detail: err instanceof Error ? err.message : String(err), + failed_extractor_version: extractor_version, + source_run_id: null, + }); + return; + } const runSection = makeRunSection({ deadLetters, diff --git a/src/sec/forms/registration-statements/s1/extractRedemption.test.ts b/src/sec/forms/registration-statements/s1/extractRedemption.test.ts index 524d7fa..120f133 100644 --- a/src/sec/forms/registration-statements/s1/extractRedemption.test.ts +++ b/src/sec/forms/registration-statements/s1/extractRedemption.test.ts @@ -50,4 +50,42 @@ describe("extractRedemption", () => { const row = await extractRedemption("no redemption here", fakeS1Model()); expect(row).toBeNull(); }); + + it("returns null for a figure-less response (no shares and no amount)", async () => { + // The prompt tells the model to return confidence 0 with null fields when no + // realized redemption is present; even with a span, a row carrying neither a + // share count nor a dollar amount is not a redemption and must not persist. + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: null, + redemption_amount: null, + price_per_share: null, + confidence: 0, + source_span: "no public shares were tendered for redemption", + }, + ]); + cleanup = unregister; + const row = await extractRedemption( + "No public shares were tendered for redemption.", + fakeS1Model() + ); + expect(row).toBeNull(); + }); + + it("rejects a negative redemption amount via schema validation", async () => { + // A sign-error / hallucinated negative amount would otherwise subtract from + // total_redemption_amount; minimum:0 makes runStructured throw so the caller + // dead-letters it instead of persisting corrupt data. + const { unregister } = registerFakeStructuredProvider([ + { + redemption_shares: 100, + redemption_amount: -8_200_000, + price_per_share: null, + confidence: 0.9, + source_span: "shares redeemed", + }, + ]); + cleanup = unregister; + await expect(extractRedemption("shares redeemed", fakeS1Model())).rejects.toThrow(); + }); }); diff --git a/src/sec/forms/registration-statements/s1/redemptionSchema.ts b/src/sec/forms/registration-statements/s1/redemptionSchema.ts index d19682e..76fad55 100644 --- a/src/sec/forms/registration-statements/s1/redemptionSchema.ts +++ b/src/sec/forms/registration-statements/s1/redemptionSchema.ts @@ -13,10 +13,10 @@ export const RedemptionOutputSchema = Type.Object({ Type.Integer({ minimum: 0, description: "Shares redeemed (public shares tendered)" }) ), redemption_amount: TypeNullable( - Type.Number({ description: "Total dollars paid to redeeming holders" }) + Type.Number({ minimum: 0, description: "Total dollars paid to redeeming holders" }) ), price_per_share: TypeNullable( - Type.Number({ description: "Per-share redemption / trust value, when stated" }) + Type.Number({ minimum: 0, description: "Per-share redemption / trust value, when stated" }) ), confidence: Type.Number({ minimum: 0, maximum: 1 }), source_span: TypeNullable(Type.String()), diff --git a/src/sec/forms/registration-statements/s1/sectionExtractors.ts b/src/sec/forms/registration-statements/s1/sectionExtractors.ts index 9e6f950..bb8af56 100644 --- a/src/sec/forms/registration-statements/s1/sectionExtractors.ts +++ b/src/sec/forms/registration-statements/s1/sectionExtractors.ts @@ -263,5 +263,7 @@ export async function extractRedemption( const prompt = `${UNTRUSTED_PREAMBLE}\n\n${instructions}\n\n${wrapUntrusted(sectionText)}`; const obj = await runStructured(model, prompt, RedemptionOutputSchema); if (obj.confidence == null || obj.source_span == null) return null; + // A "no realized redemption" response carries neither figure — not a redemption. + if (obj.redemption_shares == null && obj.redemption_amount == null) return null; return obj as unknown as RedemptionRow; } diff --git a/src/storage/spac/spacDealGrouping.redemption.test.ts b/src/storage/spac/spacDealGrouping.redemption.test.ts index c5c4749..9ad5bac 100644 --- a/src/storage/spac/spacDealGrouping.redemption.test.ts +++ b/src/storage/spac/spacDealGrouping.redemption.test.ts @@ -83,4 +83,16 @@ describe("deriveDeals redemption correlation", () => { const deals = deriveDeals(1, [], [], [red("r-1", "2026-03-01", 100, 1000)], []); expect(deals).toEqual([]); }); + + it("attaches a vote-results redemption filed before a completion-only deal's date", () => { + // A SPAC whose only ingested milestone is the completion 8-K (no 1.01 DA); + // the `vote` event opens no deal, so the deal is opened solely by `completed` + // and its only date is the later outcome_date. A redemption reported at the + // vote (filed before closing) must still attach to that single deal. + const events = [ev("vote", "2026-03-19", "vote-1"), ev("completed", "2026-03-20", "close-1")]; + const deals = deriveDeals(1, events, [], [red("r-1", "2026-03-19", 400000, 4_000_000)], []); + expect(deals).toHaveLength(1); + expect(deals[0].redemption_amount).toBe(4_000_000); + expect(deals[0].redemption_shares).toBe(400000); + }); }); diff --git a/src/storage/spac/spacDealGrouping.ts b/src/storage/spac/spacDealGrouping.ts index deb32e8..4a0c10f 100644 --- a/src/storage/spac/spacDealGrouping.ts +++ b/src/storage/spac/spacDealGrouping.ts @@ -171,15 +171,19 @@ export function deriveDeals( } // --- Correlate redemption extractions onto deals by announcement window --- - // A deal owns [lower, nextLower): lower = its announced/DA/outcome date, upper - // = the next deal's same lower bound. Unlike the merger window this ignores - // outcome_date for the upper bound, so a redemption reported at/after closing - // still attaches to the deal being closed. + // The deals contiguously partition the timeline: deal i owns [B(i-1), B(i)), + // where B(k) is the boundary between deal k and deal k+1 = the next deal's + // earliest date (announced/DA/outcome). The first deal's lower bound is + // unbounded (B(-1) = null) so a redemption reported before the first recorded + // deal date — e.g. a vote-results 8-K for a deal opened only by `completed` + // (no 1.01), whose only date is its later outcome_date — still attaches. + // Unlike the merger window this ignores outcome_date for the upper bound, so a + // redemption reported at/after closing still attaches to the deal being closed. const dealLower = (d: DealSkeleton): string | null => d.announced_date ?? d.definitive_agreement_date ?? d.outcome_date ?? null; for (let i = 0; i < skeletons.length; i++) { const d = skeletons[i]; - const lower = dealLower(d); + const lower = i === 0 ? null : dealLower(d); const upper = skeletons[i + 1] ? dealLower(skeletons[i + 1]) : null; const matched = redemptionExtractions .filter( diff --git a/src/task/spac/BackfillRedemptionsTask.ts b/src/task/spac/BackfillRedemptionsTask.ts index 2422d26..46822e6 100644 --- a/src/task/spac/BackfillRedemptionsTask.ts +++ b/src/task/spac/BackfillRedemptionsTask.ts @@ -73,12 +73,18 @@ export class BackfillRedemptionsTask extends Task< if (input.dryRun) { return { selected: accessions.length, processed: 0 }; } + // Isolate per-filing failures: one bad 8-K (fetch error, malformed body) + // must not abort the sweep over the remaining accessions. let processed = 0; for (const accessionNumber of accessions) { - const wf = context.own(new Workflow()); - wf.pipe(new ProcessAccessionDocFormTask()); - await wf.run({ accessionNumber }); - processed++; + try { + const wf = context.own(new Workflow()); + wf.pipe(new ProcessAccessionDocFormTask()); + await wf.run({ accessionNumber }); + processed++; + } catch (err) { + console.error(`backfill-redemptions: failed to reprocess ${accessionNumber}:`, err); + } } return { selected: accessions.length, processed }; } From 61bce6b06f670afdbc154bf05ccf84fb89598f16 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 05:48:37 +0000 Subject: [PATCH 38/39] fix(spac): reconcile stale deal rows on recompute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit recomputeAndSaveDeals upserted derived deals but never deleted rows from a prior, larger derivation. When the derived deal set shrinks (event stream or derivation logic changes), orphaned spac_deal rows survived and their stale columns — notably redemption_amount — were still summed by sumRedemptions, over-counting total_redemption_amount. Now delete any existing deal whose deal_index is absent from the recomputed set before saving. Shared by the 8-K milestone, merger-proxy, and redemption writers. Adds SpacRepo.deleteDeal. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- src/storage/spac/SpacRepo.ts | 4 + src/storage/spac/SpacReportWriter.ts | 10 +++ .../spac/recomputeDeals.reconcile.test.ts | 86 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 src/storage/spac/recomputeDeals.reconcile.test.ts diff --git a/src/storage/spac/SpacRepo.ts b/src/storage/spac/SpacRepo.ts index 259f530..5557c06 100644 --- a/src/storage/spac/SpacRepo.ts +++ b/src/storage/spac/SpacRepo.ts @@ -58,6 +58,10 @@ export class SpacRepo { await this.dealRepository.put(deal); } + async deleteDeal(cik: number, deal_index: number): Promise { + await this.dealRepository.delete({ cik, deal_index }); + } + /** Deals for a CIK, ascending by deal_index. */ async getDeals(cik: number): Promise { const rows = (await this.dealRepository.query({ cik })) || []; diff --git a/src/storage/spac/SpacReportWriter.ts b/src/storage/spac/SpacReportWriter.ts index b969f11..50ff32c 100644 --- a/src/storage/spac/SpacReportWriter.ts +++ b/src/storage/spac/SpacReportWriter.ts @@ -187,6 +187,16 @@ export class SpacReportWriter { this.repo.getDeals(cik), ]); const deals = deriveDeals(cik, events, extractions, redemptions, existingDeals); + // Reconcile: if a prior derivation yielded more deals than this one (the + // event stream or derivation logic changed), delete the orphaned rows. + // saveDeal only upserts, so without this their stale columns — notably + // redemption_amount — would still be summed into the rolled-up totals. + const liveIndexes = new Set(deals.map((d) => d.deal_index)); + for (const existing of existingDeals) { + if (!liveIndexes.has(existing.deal_index)) { + await this.repo.deleteDeal(existing.cik, existing.deal_index); + } + } for (const deal of deals) await this.repo.saveDeal(deal); } diff --git a/src/storage/spac/recomputeDeals.reconcile.test.ts b/src/storage/spac/recomputeDeals.reconcile.test.ts new file mode 100644 index 0000000..fd03ba8 --- /dev/null +++ b/src/storage/spac/recomputeDeals.reconcile.test.ts @@ -0,0 +1,86 @@ +/** + * @license + * Copyright 2026 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { beforeEach, describe, expect, it } from "bun:test"; +import { resetDependencyInjectionsForTesting } from "../../config/TestingDI"; +import { setupAllDatabases } from "../../config/setupAllDatabases"; +import { SpacRepo } from "./SpacRepo"; +import { SpacReportWriter } from "./SpacReportWriter"; +import type { SpacDeal } from "./SpacDealSchema"; + +/** A stale orphan deal row carrying a redemption amount, as a prior larger derivation would leave. */ +const orphanDeal = (cik: number, deal_index: number): SpacDeal => ({ + cik, + deal_index, + target_name: null, + target_cik: null, + announced_date: null, + definitive_agreement_date: null, + proxy_date: null, + vote_date: null, + pipe_amount: null, + redemption_amount: 999_999, + redemption_shares: 1, + outcome: "completed", + outcome_date: "2026-09-09", + source_accession: "orphan", + created_at: "2026-01-01T00:00:00.000Z", +}); + +describe("recomputeAndSaveDeals reconciliation", () => { + beforeEach(async () => { + resetDependencyInjectionsForTesting(); + await setupAllDatabases(); + }); + + it("deletes deal rows no longer in the derived set so the rollup drops their amounts", async () => { + const cik = 555; + const writer = new SpacReportWriter(); + await writer.recordRegistration({ + cik, + accession_number: `${cik}-reg`, + filing_date: "2025-12-01", + form: "S-1", + primary_document: "s1.htm", + spac_name: "Reconcile SPAC Inc.", + spac_sic: 6770, + }); + // A two-event stream derives exactly one deal (deal_index 0). + await writer.recordDealMilestones({ + cik, + accession_number: `${cik}-da`, + filing_date: "2026-01-10", + form: "8-K", + primary_document: null, + events: [ + { event_type: "definitive_agreement", event_date: "2026-01-10" }, + { event_type: "completed", event_date: "2026-03-20" }, + ], + }); + const repo = new SpacRepo(); + expect((await repo.getDeals(cik)).map((d) => d.deal_index)).toEqual([0]); + + // Inject a stale orphan (deal_index 1) as a prior, larger derivation would have left. + await repo.saveDeal(orphanDeal(cik, 1)); + expect((await repo.getDeals(cik)).length).toBe(2); + + // Any recompute must reconcile away the orphan. recordRedemption appends no + // event — it only recomputes deals + rebuilds the row. + await writer.recordRedemption({ + cik, + accession_number: `${cik}-redemption`, + filing_date: "2026-03-21", + form: "8-K", + }); + + const after = await repo.getDeals(cik); + expect(after.map((d) => d.deal_index)).toEqual([0]); + // The orphan's 999,999 is no longer summed into the rollup. + const spac = await repo.getSpac(cik); + expect(spac).toBeDefined(); + expect(spac?.total_redemption_amount ?? null).toBeNull(); + }); +}); From 99f0f7560cb3514673404d1969ed3561b247d51b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 06:00:43 +0000 Subject: [PATCH 39/39] =?UTF-8?q?fix(sec):=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20test=20runner,=20docs,=20backfill=20query?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - parseEightKSubmission.test.ts: use bun:test (repo convention; was the only file importing vitest). - sectionRunner parseConfidenceFloor: correct the JSDoc — a 0 floor admits every row (disables the floor); NaN is the drop-everything case. - BackfillRedemptionsTask: query filings by (form, cik) to use the ["form","cik"] index instead of loading all of a SPAC's filings and filtering. - CLAUDE.md: the milestone grouping function is deriveDeals, not the stale deriveDealsFromEvents reference. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01X3vG1nXpbisZQuoduW7Z1f --- CLAUDE.md | 2 +- .../s1/parseEightKSubmission.test.ts | 2 +- .../registration-statements/s1/sectionRunner.ts | 5 +++-- src/task/spac/BackfillRedemptionsTask.ts | 12 ++++++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 84037f8..7dca3e2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -179,7 +179,7 @@ The IPO half is populated from S-1/DRS (`registration`) and priced 424B1/424B4 (`ipo`). De-SPAC **milestone dates** are populated deterministically from 8-K item codes (known SPACs only — a `spac` row must already exist): item `1.01` → `definitive_agreement`, `1.02` → `terminated`, `2.01` → `completed`, `5.07` → -`vote`. These group into `spac_deal` attempts via `deriveDealsFromEvents` +`vote`. These group into `spac_deal` attempts via `deriveDeals` (recomputed from the event stream on every write, so `deal_index` is stable across replays) and roll up automatically. `target_name`, `pipe_amount`, and redemption amounts stay null until the narrative/AI extractors (S-4 / DEFM14A / diff --git a/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts b/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts index 0ab9751..64854d1 100644 --- a/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts +++ b/src/sec/forms/registration-statements/s1/parseEightKSubmission.test.ts @@ -3,7 +3,7 @@ * Copyright 2026 Steven Roussey * SPDX-License-Identifier: Apache-2.0 */ -import { describe, expect, it } from "vitest"; +import { describe, expect, it } from "bun:test"; import { parseEightKSubmission } from "./parseSubmission"; const wrap = (docs: string): string => diff --git a/src/sec/forms/registration-statements/s1/sectionRunner.ts b/src/sec/forms/registration-statements/s1/sectionRunner.ts index b4daa34..b923411 100644 --- a/src/sec/forms/registration-statements/s1/sectionRunner.ts +++ b/src/sec/forms/registration-statements/s1/sectionRunner.ts @@ -8,8 +8,9 @@ import type { ExtractionDeadLetterRepo } from "../../../../storage/dead-letter/E /** * Parse a confidence-floor env value. Undefined, empty, or non-numeric input - * (which `Number` would turn into `0` or `NaN`, silently dropping every row) - * falls back to `fallback`. + * falls back to `fallback` — `Number` would otherwise coerce these to `0` + * (disabling the floor, admitting every row) or `NaN` (which, since + * `confidence >= NaN` is always false, silently drops every row). */ export function parseConfidenceFloor(raw: string | undefined, fallback: number): number { if (raw === undefined || raw.trim() === "") return fallback; diff --git a/src/task/spac/BackfillRedemptionsTask.ts b/src/task/spac/BackfillRedemptionsTask.ts index 46822e6..1ae3809 100644 --- a/src/task/spac/BackfillRedemptionsTask.ts +++ b/src/task/spac/BackfillRedemptionsTask.ts @@ -20,10 +20,14 @@ export async function selectRedemptionBackfillAccessions(): Promise { const out: string[] = []; const spacs = await spacRepo.getAllSpacs(); for (const spac of spacs) { - const filings = (await filingRepo.query({ cik: spac.cik })) ?? []; - for (const f of filings) { - if ((f.form === "8-K" || f.form === "8-K/A") && hasRedemptionTriggerItem(f.items)) { - out.push(f.accession_number); + // Query by (form, cik) — the filings storage is indexed on ["form", "cik"], + // so this loads only the SPAC's 8-Ks instead of scanning all its filings. + for (const form of ["8-K", "8-K/A"]) { + const filings = (await filingRepo.query({ form, cik: spac.cik })) ?? []; + for (const f of filings) { + if (hasRedemptionTriggerItem(f.items)) { + out.push(f.accession_number); + } } } }