From 7d95eb191d2fb0a7b5d0dc2e50270311cc7d5a4d Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Thu, 18 Jun 2026 00:40:31 +0100 Subject: [PATCH 1/9] engine: zero-config local mode (engine --local / VLE_LOCAL_MODE) on :7654 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a local mode that moves the base config to zero-setup defaults BEFORE the file/env layers (which still override): listen on :7654, a localhost Postgres URL matching the bundled/dev database, local file storage, and the Postgres-backed river queue (no Redis needed). The engine then boots with no required configuration — closing the 'database.url is required' gap that otherwise blocks a bare run. - --local flag sets VLE_LOCAL_MODE=true so CLI + Docker (env) share one path. - Adds VLE_STORAGE_LOCAL_ROOT binding for the image's data volume. - cmd/engine is already unauthenticated (single tenant), so local-mode auth needs no extra wiring; documented as dev/local only. - Documented in config.example.yaml; tests cover defaults, truthy forms, env-override precedence, and the non-local missing-DB-URL failure. Foundation for the all-in-one image (HAL-185) + local dashboard (HAL-188). Closes HAL-186. --- cmd/engine/main.go | 9 +++ config.example.yaml | 13 +++++ pkg/config/config.go | 57 +++++++++++++++++++ pkg/config/config_local_test.go | 99 +++++++++++++++++++++++++++++++++ 4 files changed, 178 insertions(+) create mode 100644 pkg/config/config_local_test.go diff --git a/cmd/engine/main.go b/cmd/engine/main.go index e2048ed..bd447b0 100644 --- a/cmd/engine/main.go +++ b/cmd/engine/main.go @@ -46,8 +46,16 @@ func main() { func run() error { configPath := flag.String("config", "", "path to config.yaml (optional; env vars take precedence)") + localMode := flag.Bool("local", false, "zero-config local mode: localhost Postgres, local storage, listen on :7654, no setup (sets VLE_LOCAL_MODE)") flag.Parse() + // --local is sugar for VLE_LOCAL_MODE=true so the CLI flag and the env + // var (used by the all-in-one Docker image) flow through one path in + // config.Load. Set it before Load reads the environment. + if *localMode { + _ = os.Setenv("VLE_LOCAL_MODE", "true") + } + cfg, err := config.Load(*configPath) if err != nil { return fmt.Errorf("load config: %w", err) @@ -56,6 +64,7 @@ func run() error { logger := newLogger(cfg.Log) logger.Info("starting vectorless-engine", "version", version, + "local_mode", config.LocalModeEnabled(), "storage_driver", cfg.Storage.Driver, "queue_driver", cfg.Queue.Driver, "llm_driver", cfg.LLM.Driver, diff --git a/config.example.yaml b/config.example.yaml index 977fe24..141b165 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -13,6 +13,19 @@ # # `vectorless-engine config print` prints the effective config with secrets # redacted; `vectorless-engine config check` validates it and exits 0/1. +# +# ZERO-CONFIG LOCAL MODE +# ---------------------- +# Run `engine --local` (or set VLE_LOCAL_MODE=true) to boot with no config +# at all: it listens on :7654, points at a localhost Postgres +# (postgres://vectorless:vectorless@localhost:5432/vectorless), uses local +# file storage and the Postgres-backed river queue, and requires no API key +# to call the engine. This matches the all-in-one Docker image, where +# Postgres is bundled in the same container. You still supply an LLM +# provider key (e.g. VLE_LLM_ANTHROPIC_API_KEY) for ingestion + retrieval. +# Override any local default with the usual env/flags, e.g. +# VLE_SERVER_ADDR=:9000 or VLE_STORAGE_LOCAL_ROOT=/data/documents. +# Local mode is for dev/local use — do NOT expose it to the public internet. server: addr: ":8080" diff --git a/pkg/config/config.go b/pkg/config/config.go index 70cf5e1..3a3dab1 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -790,10 +790,64 @@ func Default() Config { } } +// localDefaultAddr is the canonical local-mode listen address. The whole +// product (engine API + bundled dashboard) is reachable at +// localhost:7654 so every doc, link, and quickstart can say one number. +const localDefaultAddr = ":7654" + +// defaultLocalDatabaseURL is the Postgres DSN local mode assumes when no +// URL is configured. It matches the bundled Postgres in the all-in-one +// Docker image and the dev docker-compose (user/pass/db all "vectorless" +// on localhost:5432), so a bare `engine --local` next to those services +// just connects. +const defaultLocalDatabaseURL = "postgres://vectorless:vectorless@localhost:5432/vectorless?sslmode=disable" + +// LocalModeEnabled reports whether zero-config local mode was requested +// via the VLE_LOCAL_MODE env var (truthy: 1/true/yes/on). The engine's +// --local flag sets this var before Load runs, so the CLI flag and the +// env var (used by the Docker image) share one code path. +func LocalModeEnabled() bool { + switch strings.ToLower(strings.TrimSpace(os.Getenv("VLE_LOCAL_MODE"))) { + case "1", "true", "yes", "on": + return true + } + return false +} + +// applyLocalDefaults rewrites the base config for zero-config local +// running: the canonical :7654 port, a localhost Postgres URL matching +// the bundled/dev database, local file storage, and the Postgres-backed +// river queue (no Redis required). It runs on the Default() base BEFORE +// the YAML file and env overrides are applied, so any value the operator +// sets explicitly still wins — local mode only moves the starting point +// so the engine boots with no required configuration. +// +// Auth: the standalone engine (cmd/engine) is already unauthenticated — +// it serves a single logical tenant with no API key — so "local-mode +// auth" needs no extra wiring here. This is dev/local only and must not +// be exposed to the public internet. +func applyLocalDefaults(c *Config) { + c.Server.Addr = localDefaultAddr + c.Database.URL = defaultLocalDatabaseURL + c.Storage.Driver = "local" + if c.Storage.Local.Root == "" { + c.Storage.Local.Root = "./data/documents" + } + c.Queue.Driver = "river" +} + // Load reads configuration from a YAML file (optional) and applies // environment overrides on top. Pass an empty path to skip the file. +// +// When VLE_LOCAL_MODE is truthy (or the engine is run with --local, which +// sets it), zero-config local defaults are applied to the base before the +// file/env layers, so the engine boots on :7654 against a localhost +// Postgres with no required configuration. File and env still override. func Load(path string) (Config, error) { cfg := Default() + if LocalModeEnabled() { + applyLocalDefaults(&cfg) + } if path != "" { data, err := os.ReadFile(path) if err != nil { @@ -869,6 +923,9 @@ func applyEnvOverrides(c *Config) { if v := os.Getenv("VLE_STORAGE_DRIVER"); v != "" { c.Storage.Driver = v } + if v := os.Getenv("VLE_STORAGE_LOCAL_ROOT"); v != "" { + c.Storage.Local.Root = v + } if v := os.Getenv("VLE_QUEUE_DRIVER"); v != "" { c.Queue.Driver = v } diff --git a/pkg/config/config_local_test.go b/pkg/config/config_local_test.go new file mode 100644 index 0000000..5c12f4b --- /dev/null +++ b/pkg/config/config_local_test.go @@ -0,0 +1,99 @@ +package config + +import "testing" + +// TestLocalModeDefaults: with VLE_LOCAL_MODE set, Load with no file and no +// other env boots a complete, valid config — :7654, a localhost Postgres +// URL, local storage, river queue — with nothing else required. +func TestLocalModeDefaults(t *testing.T) { + t.Setenv("VLE_LOCAL_MODE", "true") + + cfg, err := Load("") + if err != nil { + t.Fatalf("local-mode Load() with no other config should succeed, got: %v", err) + } + if cfg.Server.Addr != ":7654" { + t.Errorf("local mode server.addr = %q, want :7654", cfg.Server.Addr) + } + if cfg.Database.URL != defaultLocalDatabaseURL { + t.Errorf("local mode database.url = %q, want %q", cfg.Database.URL, defaultLocalDatabaseURL) + } + if cfg.Storage.Driver != "local" { + t.Errorf("local mode storage.driver = %q, want local", cfg.Storage.Driver) + } + if cfg.Queue.Driver != "river" { + t.Errorf("local mode queue.driver = %q, want river", cfg.Queue.Driver) + } + if cfg.Storage.Local.Root == "" { + t.Error("local mode storage.local.root must be set") + } +} + +// TestLocalModeTruthyForms: the env flag accepts the usual truthy spellings +// and ignores everything else. +func TestLocalModeTruthyForms(t *testing.T) { + for _, v := range []string{"1", "true", "TRUE", "yes", "on"} { + t.Setenv("VLE_LOCAL_MODE", v) + if !LocalModeEnabled() { + t.Errorf("VLE_LOCAL_MODE=%q should enable local mode", v) + } + } + for _, v := range []string{"", "0", "false", "no", "off", "nope"} { + t.Setenv("VLE_LOCAL_MODE", v) + if LocalModeEnabled() { + t.Errorf("VLE_LOCAL_MODE=%q should NOT enable local mode", v) + } + } +} + +// TestLocalModeEnvOverridesWin: local mode only moves the starting point — +// explicit env values still override the local defaults. +func TestLocalModeEnvOverridesWin(t *testing.T) { + t.Setenv("VLE_LOCAL_MODE", "true") + t.Setenv("VLE_SERVER_ADDR", ":9999") + t.Setenv("VLE_DATABASE_URL", "postgres://custom:custom@db:5432/custom?sslmode=disable") + t.Setenv("VLE_STORAGE_LOCAL_ROOT", "/srv/docs") + + cfg, err := Load("") + if err != nil { + t.Fatalf("Load() failed: %v", err) + } + if cfg.Server.Addr != ":9999" { + t.Errorf("env should override local addr: got %q, want :9999", cfg.Server.Addr) + } + if cfg.Database.URL != "postgres://custom:custom@db:5432/custom?sslmode=disable" { + t.Errorf("env should override local db url, got %q", cfg.Database.URL) + } + if cfg.Storage.Local.Root != "/srv/docs" { + t.Errorf("VLE_STORAGE_LOCAL_ROOT should set storage root, got %q", cfg.Storage.Local.Root) + } +} + +// TestNonLocalModeUnchanged: without the flag the historical defaults hold +// (:8080), and the engine still requires a database URL for the river +// queue — i.e. local mode is the ONLY thing that injects one. +func TestNonLocalModeUnchanged(t *testing.T) { + t.Setenv("VLE_LOCAL_MODE", "") + // Provide a DB URL so validation passes for the river default. + t.Setenv("VLE_DATABASE_URL", "postgres://x:x@localhost:5432/x?sslmode=disable") + + cfg, err := Load("") + if err != nil { + t.Fatalf("Load() failed: %v", err) + } + if cfg.Server.Addr != ":8080" { + t.Errorf("non-local addr = %q, want :8080", cfg.Server.Addr) + } +} + +// TestNonLocalModeMissingDBURLFails proves the local-mode injection is what +// removes the "no required config" gap: without it and without a DB URL, +// the river queue fails validation. +func TestNonLocalModeMissingDBURLFails(t *testing.T) { + t.Setenv("VLE_LOCAL_MODE", "") + t.Setenv("VLE_DATABASE_URL", "") + + if _, err := Load(""); err == nil { + t.Fatal("expected validation error for river queue with no database.url, got nil") + } +} From ee92a5d2349274450f6f80649bc74a5343f680ed Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Thu, 18 Jun 2026 15:20:32 +0100 Subject: [PATCH 2/9] docs: align documented local Postgres URL with the real default (?sslmode=disable) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per CodeRabbit review on #42 — the example referenced the DSN without the sslmode param that applyLocalDefaults actually injects. --- config.example.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.example.yaml b/config.example.yaml index 141b165..61a492b 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -18,7 +18,7 @@ # ---------------------- # Run `engine --local` (or set VLE_LOCAL_MODE=true) to boot with no config # at all: it listens on :7654, points at a localhost Postgres -# (postgres://vectorless:vectorless@localhost:5432/vectorless), uses local +# (postgres://vectorless:vectorless@localhost:5432/vectorless?sslmode=disable), uses local # file storage and the Postgres-backed river queue, and requires no API key # to call the engine. This matches the all-in-one Docker image, where # Postgres is bundled in the same container. You still supply an LLM From 47c57939603bbb8f95155380e3c3bab9ed4cca53 Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Fri, 19 Jun 2026 01:23:44 +0100 Subject: [PATCH 3/9] engine: fsync source writes + retry source fetch on ErrNotFound (HAL-319) The ingest worker could race the just-written source object (Local.Put did no fsync; River picks the job up within microseconds), failing with 'parse: fetch source: storage: object not found' and marking the doc failed. Local.Put now fsyncs before returning and the source fetch retries on ErrNotFound with short backoff. --- pkg/ingest/ingest.go | 30 +++++++++++++++++++++++++++++- pkg/storage/local.go | 19 ++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/pkg/ingest/ingest.go b/pkg/ingest/ingest.go index 8f24105..9b26243 100644 --- a/pkg/ingest/ingest.go +++ b/pkg/ingest/ingest.go @@ -540,7 +540,7 @@ func runParallelStages(ctx context.Context, summarizeFn, hydeFn func(context.Con } func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Payload) (*parser.ParsedDoc, error) { - rc, _, err := p.Storage.Get(ctx, pl.SourceRef) + rc, _, err := getSourceWithRetry(ctx, p.Storage, pl.SourceRef) if err != nil { return nil, fmt.Errorf("fetch source: %w", err) } @@ -548,6 +548,34 @@ func (p *Pipeline) parse(ctx context.Context, parsers *parser.Registry, pl Paylo return parsers.Parse(ctx, pl.ContentType, pl.Filename, rc) } +// getSourceWithRetry fetches a freshly-uploaded object, tolerating the +// brief window where the background ingest job (enqueued right after the +// upload handler's Storage.Put) outraces the source bytes becoming +// visible. Storage.Put now fsyncs, so this is belt-and-suspenders for +// slower or eventually-consistent backends: a transient ErrNotFound is +// retried with short backoff rather than failing the whole document. +// Any non-ErrNotFound error returns immediately. +func getSourceWithRetry(ctx context.Context, s storage.Storage, key string) (io.ReadCloser, storage.Metadata, error) { + const attempts = 6 + var lastErr error + for i := 0; i < attempts; i++ { + rc, meta, err := s.Get(ctx, key) + if err == nil { + return rc, meta, nil + } + if !errors.Is(err, storage.ErrNotFound) { + return nil, storage.Metadata{}, err + } + lastErr = err + select { + case <-ctx.Done(): + return nil, storage.Metadata{}, ctx.Err() + case <-time.After(time.Duration(i+1) * 150 * time.Millisecond): + } + } + return nil, storage.Metadata{}, lastErr +} + // runMinimal is the fast/minimal ingest path: parse → build tree → // persist → ready. It does ZERO LLM work — no summarize, no HyDE, no // multi-axis summaries, no TOC build — and parses with table extraction diff --git a/pkg/storage/local.go b/pkg/storage/local.go index 440c767..147fd0d 100644 --- a/pkg/storage/local.go +++ b/pkg/storage/local.go @@ -39,9 +39,22 @@ func (l *Local) Put(ctx context.Context, key string, r io.Reader, _ Metadata) er if err != nil { return err } - defer func() { _ = f.Close() }() // best-effort close - _, err = io.Copy(f, r) - return err + if _, err := io.Copy(f, r); err != nil { + _ = f.Close() + return err + } + // fsync before returning. Ingest enqueues the background parse job + // immediately after Put returns; the worker may pick it up within + // microseconds and Stat this exact path. Without the sync the bytes + // (and on Windows the directory entry) can lag behind, so the worker + // races the write and fails with ErrNotFound on a file that is in + // fact being written. Syncing here makes the object durably visible + // before the caller proceeds to enqueue. + if err := f.Sync(); err != nil { + _ = f.Close() + return err + } + return f.Close() } func (l *Local) Get(ctx context.Context, key string) (io.ReadCloser, Metadata, error) { From 9644ecc0bb4533964a8c18d3f4821d3ab400a1d3 Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Fri, 19 Jun 2026 01:23:45 +0100 Subject: [PATCH 4/9] config: document that anthropic base_url must include /v1 for GLM/Z.ai gateways (HAL-318) --- internal/config/config.go | 6 +++--- pkg/config/config.go | 12 ++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 53d2950..72000ba 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -346,9 +346,9 @@ func applyEnvOverrides(c *Config) { } } // Anthropic-compatible gateway overrides (e.g. GLM/Zhipu via - // https://api.z.ai/api/anthropic): base URL + model, so the - // anthropic driver can run a non-Anthropic model without a secret - // edit. + // https://api.z.ai/api/anthropic/v1 — the /v1 is required, the client + // posts to ${base}/messages): base URL + model, so the anthropic + // driver can run a non-Anthropic model without a secret edit. if v := firstEnv("VLS_LLM_ANTHROPIC_BASE_URL", "VLE_LLM_ANTHROPIC_BASE_URL"); v != "" { c.Engine.LLM.Anthropic.BaseURL = v } diff --git a/pkg/config/config.go b/pkg/config/config.go index 3a3dab1..12dfe0c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -386,8 +386,15 @@ type AnthropicBlock struct { // BaseURL overrides the Anthropic API endpoint. Empty = official // api.anthropic.com. Set this to point the Anthropic driver at any // Anthropic-compatible gateway — e.g. GLM/Zhipu's - // https://api.z.ai/api/anthropic — so the same driver can drive a + // https://api.z.ai/api/anthropic/v1 — so the same driver can drive a // non-Anthropic model that speaks the Messages API. + // + // IMPORTANT: the value must include the API version segment (.../v1). + // The underlying client posts to "${base_url}/messages" (its built-in + // default is https://api.anthropic.com/v1), so a base_url WITHOUT /v1 + // resolves to .../anthropic/messages — which z.ai answers with an + // HTTP 200 body {"code":500,"msg":"404 NOT_FOUND"} and the engine then + // reports the opaque "anthropic: no response". BaseURL string `yaml:"base_url"` } @@ -934,7 +941,8 @@ func applyEnvOverrides(c *Config) { } // Anthropic-driver overrides. These let an operator point the // anthropic driver at an Anthropic-compatible gateway (e.g. GLM via - // https://api.z.ai/api/anthropic) without baking the values into the + // https://api.z.ai/api/anthropic/v1 — the /v1 is required; see + // AnthropicBlock.BaseURL) without baking the values into the // config file or secret. if v := os.Getenv("VLE_LLM_ANTHROPIC_API_KEY"); v != "" { c.LLM.Anthropic.APIKey = v From b1fd72871e59ca70b41ef4274964d14197f070ff Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Fri, 19 Jun 2026 01:23:46 +0100 Subject: [PATCH 5/9] engine: add GET /v1/documents/{id}/source + all-in-one Docker image (HAL-188, HAL-185) New GET /v1/documents/{id}/source streams the original bytes (for PDF page previews in clients). Dockerfile.allinone bundles the engine + Postgres + the local web UI into one image; docker-allinone.yml publishes it to Docker Hub + GHCR. --- .github/workflows/docker-allinone.yml | 73 +++++ Dockerfile.allinone | 69 +++++ deploy/allinone/entrypoint.sh | 36 +++ internal/api/server.go | 45 +++ localapp/README.md | 44 +++ localapp/index.html | 418 ++++++++++++++++++++++++++ localapp/serve.py | 144 +++++++++ 7 files changed, 829 insertions(+) create mode 100644 .github/workflows/docker-allinone.yml create mode 100644 Dockerfile.allinone create mode 100644 deploy/allinone/entrypoint.sh create mode 100644 localapp/README.md create mode 100644 localapp/index.html create mode 100644 localapp/serve.py diff --git a/.github/workflows/docker-allinone.yml b/.github/workflows/docker-allinone.yml new file mode 100644 index 0000000..eb6818c --- /dev/null +++ b/.github/workflows/docker-allinone.yml @@ -0,0 +1,73 @@ +name: docker-allinone + +# Build and publish the ALL-IN-ONE image (engine + bundled Postgres + web UI) +# so anyone can `docker run` Vectorless with just an LLM key. +# +# Publishes to Docker Hub AND GitHub Container Registry: +# docker.io//vectorless:latest|sha-|vX.Y.Z +# ghcr.io/hallelx2/vectorless:latest|sha-|vX.Y.Z +# +# Requires two repo secrets for the Docker Hub push: +# DOCKERHUB_USERNAME — your Docker Hub account/namespace +# DOCKERHUB_TOKEN — a Docker Hub access token with Read/Write/Delete scope +# (GHCR uses the built-in GITHUB_TOKEN — no extra secret.) + +on: + workflow_dispatch: {} # run on demand from the Actions tab / gh CLI + push: + tags: ["v*.*.*"] + +permissions: + contents: read + packages: write # push to ghcr.io + +jobs: + publish: + name: build + push all-in-one + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Log in to ghcr.io + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract tags + labels + id: meta + uses: docker/metadata-action@v5 + with: + images: | + docker.io/${{ secrets.DOCKERHUB_USERNAME }}/vectorless + ghcr.io/${{ github.repository_owner }}/vectorless + tags: | + type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }} + type=ref,event=tag + type=sha,prefix=sha-,format=short + + - name: Build + push + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile.allinone + platforms: linux/amd64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + VERSION=${{ github.ref_name }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile.allinone b/Dockerfile.allinone new file mode 100644 index 0000000..21c82f3 --- /dev/null +++ b/Dockerfile.allinone @@ -0,0 +1,69 @@ +# ── All-in-one image: engine + bundled Postgres + viewer UI ────────── +# +# One `docker run` gives a fully working Vectorless: the retrieval engine, +# a Postgres instance bundled in the same container, and the local web UI. +# The only thing the user supplies is an LLM provider key. +# +# docker run -p 8080:8080 -p 7654:7654 \ +# -e VLE_LLM_ANTHROPIC_API_KEY= \ +# hallelx2/vectorless:latest +# # → UI: http://localhost:8080 +# # → API: http://localhost:7654 +# +# Context: vectorless-engine/ directory. + +# ── Build stage ────────────────────────────────────────────────────── +FROM golang:1.25-alpine AS build +RUN apk add --no-cache ca-certificates +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY cmd/ ./cmd/ +COPY pkg/ ./pkg/ +COPY internal/ ./internal/ +ARG VERSION=dev +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ + go build -trimpath -ldflags="-s -w -X main.version=${VERSION}" \ + -o /bin/engine ./cmd/engine + +# ── Runtime stage: Postgres base + python + engine + viewer ────────── +FROM postgres:16-bookworm + +RUN apt-get update \ + && apt-get install -y --no-install-recommends python3 ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /bin/engine /usr/local/bin/engine +COPY localapp/ /opt/vectorless-app/ +COPY deploy/allinone/entrypoint.sh /usr/local/bin/vl-entrypoint.sh +RUN chmod +x /usr/local/bin/vl-entrypoint.sh + +# Bundled Postgres credentials — must match engine --local's expected DSN +# (postgres://vectorless:vectorless@localhost:5432/vectorless). +ENV POSTGRES_USER=vectorless \ + POSTGRES_PASSWORD=vectorless \ + POSTGRES_DB=vectorless + +# Engine defaults: local mode, minimal ingest (fast, queryable in seconds), +# document bytes under /data (mount a volume here to persist), and GLM via +# z.ai's Anthropic-compatible gateway out of the box. Override any of these +# with -e at runtime; the user still supplies VLE_LLM_ANTHROPIC_API_KEY. +ENV VLE_INGEST_MODE=minimal \ + VLE_STORAGE_LOCAL_ROOT=/data/documents \ + VLE_LLM_DRIVER=anthropic \ + VLE_LLM_ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic/v1 \ + VLE_LLM_ANTHROPIC_MODEL=glm-4.6 \ + VIEWER_PORT=8080 \ + ENGINE_URL=http://localhost:7654 \ + HOST=0.0.0.0 + +EXPOSE 8080 7654 +VOLUME ["/data", "/var/lib/postgresql/data"] + +ENTRYPOINT ["/usr/local/bin/vl-entrypoint.sh"] + +LABEL org.opencontainers.image.title="vectorless (all-in-one)" +LABEL org.opencontainers.image.description="Vectorless retrieval engine + bundled Postgres + web UI in one container. Reasoning-based document retrieval — no chunking, no embeddings, no vector DB." +LABEL org.opencontainers.image.source="https://github.com/hallelx2/vectorless-engine" +LABEL org.opencontainers.image.licenses="Apache-2.0" +LABEL org.opencontainers.image.vendor="Vectorless" diff --git a/deploy/allinone/entrypoint.sh b/deploy/allinone/entrypoint.sh new file mode 100644 index 0000000..d5669ee --- /dev/null +++ b/deploy/allinone/entrypoint.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# All-in-one entrypoint: Postgres + Vectorless engine + the local viewer UI, +# all in one container. Postgres is bundled so `docker run` needs no external +# services — the user only supplies an LLM provider key. +set -euo pipefail + +PGUSER_="${POSTGRES_USER:-vectorless}" +PGDB_="${POSTGRES_DB:-vectorless}" + +echo "[vectorless] starting bundled Postgres…" +# The official postgres entrypoint handles first-run initdb (using the +# POSTGRES_* env vars) and then execs postgres. Run it in the background so we +# can start the engine + UI alongside it in the same container. +docker-entrypoint.sh postgres & + +echo "[vectorless] waiting for Postgres to accept connections…" +until pg_isready -h localhost -U "$PGUSER_" -d "$PGDB_" >/dev/null 2>&1; do + sleep 1 +done +echo "[vectorless] Postgres ready." + +# Start the viewer UI (serves the single-page app + same-origin proxy to the +# engine). Backgrounded; the engine is the container's main process. +if [ -f /opt/vectorless-app/serve.py ]; then + echo "[vectorless] starting viewer UI on :${VIEWER_PORT:-8080} → ${ENGINE_URL:-http://localhost:7654}" + PYTHONIOENCODING=utf-8 python3 /opt/vectorless-app/serve.py & +fi + +if [ -z "${VLE_LLM_ANTHROPIC_API_KEY:-}" ] && [ -z "${VLE_LLM_OPENAI_API_KEY:-}" ] && [ -z "${VLE_LLM_GEMINI_API_KEY:-}" ]; then + echo "[vectorless] WARNING: no LLM provider key set. Ingestion will work, but" + echo "[vectorless] queries need e.g. -e VLE_LLM_ANTHROPIC_API_KEY=" +fi + +echo "[vectorless] starting engine (local mode) on :7654 …" +# exec so the engine becomes PID 1's foreground process and receives signals. +exec engine --local diff --git a/internal/api/server.go b/internal/api/server.go index c34fc14..f43dc49 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -140,6 +140,7 @@ func Router(d Deps) http.Handler { r.Get("/{id}", d.handleGetDocument) r.Delete("/{id}", d.handleDeleteDocument) r.Get("/{id}/tree", d.handleGetTree) + r.Get("/{id}/source", d.handleGetSource) }) r.Get("/sections/{id}", d.handleGetSection) @@ -364,6 +365,50 @@ func (d Deps) handleDeleteDocument(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNoContent) } +// handleGetSource streams the original uploaded bytes for a document. +// Useful for clients that want to render the source (e.g. a PDF page +// preview in a viewer) without a second storage system. Served inline +// with the document's content type. +func (d Deps) handleGetSource(w http.ResponseWriter, r *http.Request) { + id := tree.DocumentID(chi.URLParam(r, "id")) + doc, err := d.DB.GetDocument(r.Context(), id, standaloneOrgID, "") + if err != nil { + if errors.Is(err, db.ErrNotFound) { + writeErr(w, http.StatusNotFound, "document not found") + return + } + writeErr(w, http.StatusInternalServerError, err.Error()) + return + } + if doc.SourceRef == "" { + writeErr(w, http.StatusNotFound, "document has no stored source") + return + } + rc, meta, err := d.Storage.Get(r.Context(), doc.SourceRef) + if err != nil { + if errors.Is(err, storage.ErrNotFound) { + writeErr(w, http.StatusNotFound, "source object not found") + return + } + writeErr(w, http.StatusInternalServerError, err.Error()) + return + } + defer func() { _ = rc.Close() }() + + ct := doc.ContentType + if ct == "" { + ct = "application/octet-stream" + } + w.Header().Set("Content-Type", ct) + if meta.Size > 0 { + w.Header().Set("Content-Length", strconv.FormatInt(meta.Size, 10)) + } + w.Header().Set("Content-Disposition", "inline") + w.Header().Set("Cache-Control", "private, max-age=300") + w.WriteHeader(http.StatusOK) + _, _ = io.Copy(w, rc) +} + func (d Deps) handleGetTree(w http.ResponseWriter, r *http.Request) { id := tree.DocumentID(chi.URLParam(r, "id")) t, err := d.DB.LoadTree(r.Context(), id, standaloneOrgID, "") diff --git a/localapp/README.md b/localapp/README.md new file mode 100644 index 0000000..94bc682 --- /dev/null +++ b/localapp/README.md @@ -0,0 +1,44 @@ +# Vectorless — local viewer + +A tiny, dependency-free local UI for the OSS `vectorless-engine`. Upload a PDF, +watch it ingest into a structured tree, browse the section map, and ask +questions that come back with **cited** answers (page range + verbatim quote) — +answered by whatever model the engine is configured with (here: GLM-4.6 via +z.ai's Anthropic-compatible gateway). + +This is the minimal slice of **HAL-188** (local dashboard). It is intentionally +small: a single `index.html` + a stdlib Python proxy. No build step, no Node. + +## Why the proxy +The `engine --local` binary emits **no CORS headers**, so a browser page can't +call `http://localhost:7654` cross-origin. `serve.py` serves the page **and** +reverse-proxies `/engine/*` to the engine, so every request is same-origin. + +## Run + +```bash +# 1. Start the engine (from vectorless-engine/), local mode + your GLM key: +cd ../vectorless-engine +set -a; . ./.env; set +a # GLM key + base_url (.../api/anthropic/v1) + glm-4.6 +export VLE_INGEST_MODE=minimal +./bin/engine.exe --local # listens on :7654 + +# 2. Start the viewer (from this folder): +cd ../local-viewer +python serve.py # http://localhost:7655 +``` + +Then open **http://localhost:7655** and: +1. Drop a PDF (e.g. a FinanceBench 10-K) onto **Upload**. +2. Watch it move to **ready** in the **Documents** list; click it. +3. Inspect the **Structure map** (section tree + page ranges). +4. Type a question in **Ask** → get a cited answer with confidence, hops, and cost. + +## Config +- `ENGINE_URL` (default `http://localhost:7654`) — where the engine listens. +- `VIEWER_PORT` (default `7655`) — the viewer's port. + +## Endpoints it uses +`GET /v1/health` · `GET /v1/documents` · `POST /v1/documents` (multipart) · +`GET /v1/documents/{id}` · `GET /v1/documents/{id}/tree` · +`POST /v1/answer/treewalk`. diff --git a/localapp/index.html b/localapp/index.html new file mode 100644 index 0000000..76c278e --- /dev/null +++ b/localapp/index.html @@ -0,0 +1,418 @@ + + + + + +Vectorless · local + + + + + + + + + + +
+ + +
+
+
+
Reasoning-based retrieval
+

Ask a document. Get a cited answer.

+

Upload a PDF — the engine parses it into a structured tree and an LLM navigates that + structure to answer, no chunking, no embeddings, no vectors.

+
+
01
Upload
Drop a PDF in the left rail.
+
02
Ingest
Parsed into a section tree in seconds.
+
03
Ask
Cited answers — page, quote & preview.
+
+
+ + +
+
+
+ + + + diff --git a/localapp/serve.py b/localapp/serve.py new file mode 100644 index 0000000..6eb330a --- /dev/null +++ b/localapp/serve.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +Local viewer for the vectorless-engine. + +Serves the single-page viewer (index.html) AND reverse-proxies every +request under /engine/* to the engine on :7654. Same-origin, so the +browser never makes a cross-origin call — no CORS config needed on the +engine (the OSS `engine --local` binary emits no CORS headers). + + python serve.py # viewer on http://localhost:7655, engine assumed on :7654 + VIEWER_PORT=8000 ENGINE_URL=http://localhost:7654 python serve.py + +This is the minimal local-app shell tracked as HAL-188. +""" +import os +import sys +import urllib.request +import urllib.error +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +HERE = os.path.dirname(os.path.abspath(__file__)) +ENGINE_URL = os.environ.get("ENGINE_URL", "http://localhost:7654").rstrip("/") +PORT = int(os.environ.get("VIEWER_PORT", "7655")) +# Bind host. Default localhost-only for local dev safety; set HOST=0.0.0.0 to +# expose it (the all-in-one Docker image does this so the mapped port works). +HOST = os.environ.get("HOST", "127.0.0.1") + +# Hop-by-hop / host headers we must not forward verbatim. +_SKIP_REQ = {"host", "connection", "content-length", "accept-encoding"} +_SKIP_RESP = {"transfer-encoding", "connection", "content-encoding", "content-length"} + + +class Handler(BaseHTTPRequestHandler): + protocol_version = "HTTP/1.1" + + # ---- static viewer ---- + def _serve_index(self): + try: + with open(os.path.join(HERE, "index.html"), "rb") as f: + body = f.read() + except FileNotFoundError: + self.send_error(404, "index.html not found next to serve.py") + return + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body) + + # ---- reverse proxy to the engine ---- + def _proxy(self, method): + target = ENGINE_URL + self.path[len("/engine"):] + length = int(self.headers.get("Content-Length", 0) or 0) + body = self.rfile.read(length) if length else None + + req = urllib.request.Request(target, data=body, method=method) + for k, v in self.headers.items(): + if k.lower() not in _SKIP_REQ: + req.add_header(k, v) + + try: + resp = urllib.request.urlopen(req, timeout=300) + data = resp.read() + status = resp.status + headers = resp.getheaders() + except urllib.error.HTTPError as e: + data = e.read() + status = e.code + headers = list(e.headers.items()) + except urllib.error.URLError as e: + msg = f'{{"error":"cannot reach engine at {ENGINE_URL}: {e.reason}"}}'.encode() + self.send_response(502) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(msg))) + self.end_headers() + self.wfile.write(msg) + return + + self.send_response(status) + sent_ct = False + for k, v in headers: + if k.lower() in _SKIP_RESP: + continue + if k.lower() == "content-type": + sent_ct = True + self.send_header(k, v) + if not sent_ct: + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + _CT = {".html": "text/html; charset=utf-8", ".svg": "image/svg+xml", + ".css": "text/css", ".js": "text/javascript", ".ico": "image/x-icon", + ".png": "image/png"} + + def _serve_static(self, path): + rel = path.lstrip("/") or "index.html" + # contain to this directory — no traversal + full = os.path.normpath(os.path.join(HERE, rel)) + if not full.startswith(HERE) or not os.path.isfile(full): + self.send_error(404) + return + with open(full, "rb") as f: + body = f.read() + ext = os.path.splitext(full)[1].lower() + self.send_response(200) + self.send_header("Content-Type", self._CT.get(ext, "application/octet-stream")) + self.send_header("Content-Length", str(len(body))) + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body) + + def do_GET(self): + path = self.path.split("?", 1)[0] + if self.path.startswith("/engine/"): + self._proxy("GET") + else: + self._serve_static(path) + + def do_POST(self): + if self.path.startswith("/engine/"): + self._proxy("POST") + else: + self.send_error(404) + + def do_DELETE(self): + if self.path.startswith("/engine/"): + self._proxy("DELETE") + else: + self.send_error(404) + + def log_message(self, *a): # quiet + pass + + +if __name__ == "__main__": + print(f"Vectorless local viewer -> http://localhost:{PORT} (bind {HOST}:{PORT})") + print(f"Proxying /engine/* -> {ENGINE_URL}") + try: + ThreadingHTTPServer((HOST, PORT), Handler).serve_forever() + except KeyboardInterrupt: + sys.exit(0) From e0992e1eb81c6c8a4acae9ba23ccc0bbe1646195 Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Fri, 19 Jun 2026 01:24:31 +0100 Subject: [PATCH 6/9] ci: enforce LF for shell scripts (protect the Docker entrypoint) --- .gitattributes | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7da8c73 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +# Shell scripts and the all-in-one entrypoint MUST stay LF — CRLF breaks the +# shebang inside the Linux container ("bad interpreter: /usr/bin/env bash^M"). +*.sh text eol=lf +deploy/allinone/entrypoint.sh text eol=lf +localapp/serve.py text eol=lf From ea11a064bbf91160fa627d4060f6c3427049123c Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Fri, 19 Jun 2026 10:39:48 +0100 Subject: [PATCH 7/9] ci: build all-in-one image from the local-mode branch to publish :latest --- .github/workflows/docker-allinone.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-allinone.yml b/.github/workflows/docker-allinone.yml index eb6818c..61fb2ed 100644 --- a/.github/workflows/docker-allinone.yml +++ b/.github/workflows/docker-allinone.yml @@ -13,8 +13,12 @@ name: docker-allinone # (GHCR uses the built-in GITHUB_TOKEN — no extra secret.) on: - workflow_dispatch: {} # run on demand from the Actions tab / gh CLI + workflow_dispatch: {} # run on demand once this lands on the default branch push: + # TEMPORARY: build from the local-mode feature branch so we can publish a + # usable :latest before this merges to main. After merge, drop the branch + # entry and rely on workflow_dispatch + tags (latest then tracks default). + branches: ["halleluyaholudele/hal-186-engine-zero-config-local-mode"] tags: ["v*.*.*"] permissions: @@ -53,8 +57,7 @@ jobs: docker.io/${{ secrets.DOCKERHUB_USERNAME }}/vectorless ghcr.io/${{ github.repository_owner }}/vectorless tags: | - type=raw,value=latest,enable={{is_default_branch}} - type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }} + type=raw,value=latest type=ref,event=tag type=sha,prefix=sha-,format=short From ccaa5c9244d9459f909541ac5fc54a7e97341026 Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Fri, 19 Jun 2026 12:03:43 +0100 Subject: [PATCH 8/9] =?UTF-8?q?engine+ui:=20BYOK=20=E2=80=94=20per-request?= =?UTF-8?q?=20LLM=20key=20via=20X-LLM-*=20headers=20+=20dashboard=20settin?= =?UTF-8?q?gs=20(HAL-188)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The engine now boots without a provider key in local mode and accepts per-request credentials (X-LLM-Api-Key / X-LLM-Provider / X-LLM-Base-Url / X-LLM-Model), building a per-request client that drives both the treewalk loop and citation span extraction. The bundled UI gains a settings modal that stores the key in the browser and sends it as headers — so a docker-run user configures their key from the dashboard, not only via env. --- cmd/engine/main.go | 80 ++++++++++++++++++++++++++++++++++- internal/api/server.go | 7 ++++ internal/api/treewalk.go | 52 +++++++++++++++++++++-- localapp/index.html | 91 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 222 insertions(+), 8 deletions(-) diff --git a/cmd/engine/main.go b/cmd/engine/main.go index bd447b0..a379c98 100644 --- a/cmd/engine/main.go +++ b/cmd/engine/main.go @@ -103,7 +103,16 @@ func run() error { llmClient, err := buildLLM(cfg.LLM) if err != nil { - return fmt.Errorf("init llm: %w", err) + // A missing provider key is non-fatal in local mode: the bundled UI + // (and any caller) can supply credentials per request via X-LLM-* + // headers (BYOK), so boot without a shared client and let those + // requests build their own. Any other init error is still fatal. + if config.LocalModeEnabled() && llmKeyMissing(cfg.LLM) { + logger.Warn("no LLM provider key configured — queries require a per-request key (BYOK via X-LLM-Api-Key), or set VLE_LLM_ANTHROPIC_API_KEY") + llmClient = nil + } else { + return fmt.Errorf("init llm: %w", err) + } } strategy := buildStrategy(cfg.Retrieval, llmClient, store) @@ -221,12 +230,16 @@ func run() error { // (gated by retrieval.treewalk.enabled), even on a deployment // using chunked-tree as its default selection path. var treeWalkStrategy *retrieval.TreeWalkStrategy - if cfg.Retrieval.TreeWalk.Enabled && llmClient != nil { + if cfg.Retrieval.TreeWalk.Enabled { + // Built even when llmClient is nil (no server key): the per-request + // BYOK path sets the strategy's client from X-LLM-Api-Key headers, + // so the endpoint stays available for callers that bring their own key. treeWalkStrategy = buildTreeWalkStrategy(cfg.Retrieval, llmClient, store) logger.Info("retrieval: treewalk answer endpoint enabled", "max_hops", treeWalkStrategy.MaxHops, "page_content_limit", treeWalkStrategy.PageContentLimit, "model_override", cfg.Retrieval.TreeWalk.Model, + "server_key", llmClient != nil, ) } @@ -250,6 +263,9 @@ func run() error { Abstain: cfg.Retrieval.Abstain, TreeWalkStrategy: treeWalkStrategy, TreeWalk: cfg.Retrieval.TreeWalk, + BuildLLM: func(provider, apiKey, baseURL, model string) (llmgate.Client, error) { + return buildLLMFrom(cfg.LLM, provider, apiKey, baseURL, model) + }, } srv := &http.Server{ @@ -389,6 +405,66 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) { } } +// buildLLMFrom constructs an llmgate client from caller-supplied +// credentials (BYOK), inheriting the server's configured provider, base +// URL, and model whenever a field is left empty. This backs the +// per-request X-LLM-* headers so a user of the bundled UI can paste only +// their API key and have everything else default to the engine's config. +// llmKeyMissing reports whether the configured provider has no API key. +// Used to keep local-mode boot non-fatal so per-request BYOK can work. +func llmKeyMissing(c config.LLMConfig) bool { + switch c.Driver { + case "anthropic": + return c.Anthropic.APIKey == "" + case "openai": + return c.OpenAI.APIKey == "" + case "gemini": + return c.Gemini.APIKey == "" + } + return false +} + +func buildLLMFrom(c config.LLMConfig, provider, apiKey, baseURL, model string) (llmgate.Client, error) { + if provider == "" { + provider = c.Driver + } + switch provider { + case "anthropic": + if model == "" { + model = c.Anthropic.Model + } + if baseURL == "" { + baseURL = c.Anthropic.BaseURL + } + return anthropic.New(anthropic.Config{ + APIKey: apiKey, + Model: model, + ReasoningModel: c.Anthropic.ReasoningModel, + BaseURL: baseURL, + }) + case "openai": + if model == "" { + model = c.OpenAI.Model + } + return openai.New(openai.Config{ + APIKey: apiKey, + Model: model, + ReasoningModel: c.OpenAI.ReasoningModel, + }) + case "gemini": + if model == "" { + model = c.Gemini.Model + } + return gemini.New(gemini.Config{ + APIKey: apiKey, + Model: model, + ReasoningModel: c.Gemini.ReasoningModel, + }) + default: + return nil, fmt.Errorf("unknown llm provider: %s", provider) + } +} + func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy { switch c.Strategy { case "single-pass": diff --git a/internal/api/server.go b/internal/api/server.go index f43dc49..d9eadf2 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -59,6 +59,13 @@ type Deps struct { // LLMModel is the default model name. Per-request overrides win. LLMModel string + // BuildLLM constructs a per-request llmgate client from caller-supplied + // credentials (BYOK), inheriting server defaults for any empty field. + // Wired in main.go. When set, callers can pass their own key/base_url/ + // model via X-LLM-* request headers; nil disables per-request keys and + // handlers fall back to the shared LLM client. See resolveLLM. + BuildLLM func(provider, apiKey, baseURL, model string) (llmgate.Client, error) + // AnswerSpan / Answer hold the relevant config blocks. Default // values (AnswerSpan disabled, Answer.MaxSections=5) are safe. AnswerSpan config.AnswerSpanBlock diff --git a/internal/api/treewalk.go b/internal/api/treewalk.go index 6bd895a..9337db2 100644 --- a/internal/api/treewalk.go +++ b/internal/api/treewalk.go @@ -78,8 +78,32 @@ type treeWalkAnswerRequest struct { // "stream"?: false, "reasoning"?: false } // // Response: see treeWalkAnswerResponse below. +// resolveLLM picks the llmgate client for this request. When the caller +// supplies BYOK credentials via the X-LLM-Api-Key header (optionally +// X-LLM-Provider / X-LLM-Base-Url / X-LLM-Model) and a BuildLLM factory is +// wired, it builds a per-request client inheriting server defaults for any +// empty field; otherwise it returns the shared client. The returned model +// string is the header-supplied model override (may be ""). +func (d Deps) resolveLLM(r *http.Request) (llmgate.Client, string, error) { + model := r.Header.Get("X-LLM-Model") + key := r.Header.Get("X-LLM-Api-Key") + if key == "" || d.BuildLLM == nil { + return d.LLM, model, nil + } + c, err := d.BuildLLM( + r.Header.Get("X-LLM-Provider"), + key, + r.Header.Get("X-LLM-Base-Url"), + model, + ) + if err != nil { + return nil, model, err + } + return c, model, nil +} + func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { - if d.LLM == nil { + if d.LLM == nil && d.BuildLLM == nil { writeErr(w, http.StatusNotImplemented, "answer/treewalk endpoint requires an LLM client") return } @@ -126,9 +150,29 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { if body.MaxPagesPerFetch > 0 { perReq.PageContentLimit = body.MaxPagesPerFetch } + // BYOK: if the caller supplies their own LLM credentials via X-LLM-* + // headers, build a per-request client and run BOTH the navigation loop + // and citation span-extraction through it. dReq is a value copy of Deps + // with the per-request client swapped in — Deps is passed by value so + // this never mutates the shared instance other goroutines read. + client, hdrModel, err := d.resolveLLM(r) + if err != nil { + writeErr(w, http.StatusBadRequest, "invalid LLM credentials: "+err.Error()) + return + } + if client == nil { + writeErr(w, http.StatusBadRequest, "no LLM credentials: configure a server key or send an X-LLM-Api-Key header (BYOK)") + return + } + perReq.LLM = client + dReq := d + dReq.LLM = client + if body.Model == "" { + body.Model = hdrModel + } + // Per-request model override falls through to budget.ModelName // the same way every other handler does. - budget := retrieval.ContextBudget{ModelName: body.Model} if budget.ModelName == "" { budget.ModelName = d.LLMModel @@ -139,7 +183,7 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { // Stream variant: hijack the response writer for SSE and emit // one event per tool call. if body.Stream { - d.serveAnswerTreeWalkStream(w, r, &perReq, t, body, budget, started) + dReq.serveAnswerTreeWalkStream(w, r, &perReq, t, body, budget, started) return } @@ -164,7 +208,7 @@ func (d Deps) handleAnswerTreeWalk(w http.ResponseWriter, r *http.Request) { return } - citations := d.buildTreeWalkCitations(r.Context(), t, res, body.Query, body.Model) + citations := dReq.buildTreeWalkCitations(r.Context(), t, res, body.Query, body.Model) resp := map[string]any{ "document_id": body.DocumentID, diff --git a/localapp/index.html b/localapp/index.html index 76c278e..dc4197c 100644 --- a/localapp/index.html +++ b/localapp/index.html @@ -31,6 +31,26 @@ .status{margin-left:auto;display:flex;align-items:center;gap:7px;font-size:12px;color:var(--muted)} .dot{width:8px;height:8px;border-radius:50%;background:var(--muted);transition:background .3s} .dot.up{background:var(--ok)} .dot.down{background:var(--err)} + .navbtn{display:flex;align-items:center;gap:5px;background:none;border:1px solid var(--hairline);border-radius:8px; + padding:5px 8px;cursor:pointer;color:var(--secondary);transition:border-color .12s,color .12s} + .navbtn:hover{border-color:var(--blue);color:var(--blue)} + .keydot{width:7px;height:7px;border-radius:50%;background:var(--err)} + .keydot.set{background:var(--ok)} + /* modal */ + .modal-bg{position:fixed;inset:0;background:rgba(10,10,10,.35);display:none;align-items:center;justify-content:center;z-index:50} + .modal-bg.open{display:flex} + .modal{background:var(--panel);border:1px solid var(--hairline);border-radius:16px;width:440px;max-width:92vw; + box-shadow:0 20px 60px rgba(0,0,0,.18);overflow:hidden} + .modal .mhead{padding:18px 22px 14px;border-bottom:1px solid var(--hairline2)} + .modal .mhead h3{margin:0;font-size:15px;font-weight:500} .modal .mhead p{margin:5px 0 0;font-size:12.5px;color:var(--muted)} + .modal .mbody{padding:18px 22px;display:flex;flex-direction:column;gap:14px} + .field label{display:block;font-size:12px;font-weight:500;color:var(--secondary);margin-bottom:5px} + .field input,.field select{width:100%;font-family:inherit;font-size:13.5px;color:var(--ink);background:var(--canvas); + border:1px solid var(--hairline);border-radius:9px;padding:9px 11px} + .field input:focus,.field select:focus{outline:none;border-color:var(--blue);box-shadow:0 0 0 3px rgba(20,86,240,.08)} + .field .fhint{font-size:11px;color:var(--muted);margin-top:4px} + .modal .mfoot{padding:14px 22px;border-top:1px solid var(--hairline2);display:flex;gap:10px;justify-content:flex-end;align-items:center} + .modal .mfoot .spacer{margin-right:auto;font-size:12px} .shell{display:grid;grid-template-columns:300px 1fr;flex:1 1 auto;min-height:0} @media(max-width:860px){.shell{grid-template-columns:1fr}} @@ -144,6 +164,10 @@ Vectorless local engine · glm-4.6 · treewalk + checking… @@ -198,8 +222,69 @@

Ask a document. Get a cited answer.

+ + From 079d2aabff02ef5101ce61dd25ebee70cfe34e12 Mon Sep 17 00:00:00 2001 From: hallelx2 Date: Fri, 19 Jun 2026 12:28:35 +0100 Subject: [PATCH 9/9] ci: trigger all-in-one image build on main + dispatch (post-merge) --- .github/workflows/docker-allinone.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker-allinone.yml b/.github/workflows/docker-allinone.yml index 61fb2ed..29c3839 100644 --- a/.github/workflows/docker-allinone.yml +++ b/.github/workflows/docker-allinone.yml @@ -13,12 +13,9 @@ name: docker-allinone # (GHCR uses the built-in GITHUB_TOKEN — no extra secret.) on: - workflow_dispatch: {} # run on demand once this lands on the default branch + workflow_dispatch: {} # run on demand from the Actions tab / gh CLI push: - # TEMPORARY: build from the local-mode feature branch so we can publish a - # usable :latest before this merges to main. After merge, drop the branch - # entry and rely on workflow_dispatch + tags (latest then tracks default). - branches: ["halleluyaholudele/hal-186-engine-zero-config-local-mode"] + branches: [main] # publish :latest on every push to the default branch tags: ["v*.*.*"] permissions: @@ -57,7 +54,8 @@ jobs: docker.io/${{ secrets.DOCKERHUB_USERNAME }}/vectorless ghcr.io/${{ github.repository_owner }}/vectorless tags: | - type=raw,value=latest + type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }} type=ref,event=tag type=sha,prefix=sha-,format=short