diff --git a/internal/cli/connection/core/sync/state.go b/internal/cli/connection/core/sync/state.go index f7ab4de97..36095e72f 100644 --- a/internal/cli/connection/core/sync/state.go +++ b/internal/cli/connection/core/sync/state.go @@ -41,18 +41,20 @@ func loadState() (state, func(), error) { return s, nil, mkErr } - // Acquire lock: fail if another sync is running. - if _, statErr := os.Stat(lockPath); statErr == nil { - return s, nil, os.ErrExist + // Acquire lock: fail if another sync is running. The + // O_CREATE|O_EXCL create-or-fail is a single syscall, so + // there is no check-to-write window for a concurrent sync + // to slip through. + acquired, lockErr := io.SafeTryLock(lockPath, fs.PermFile) + if lockErr != nil { + return s, nil, lockErr } - if writeErr := io.SafeWriteFile( - lockPath, []byte(cfgHub.LockSentinel), fs.PermFile, - ); writeErr != nil { - return s, nil, writeErr + if !acquired { + return s, nil, os.ErrExist } release := func() { - if rmErr := os.Remove(lockPath); rmErr != nil { + if rmErr := io.SafeUnlock(lockPath); rmErr != nil { logWarn.Warn(cfgWarn.Remove, lockPath, rmErr) } } diff --git a/internal/cli/connection/core/sync/state_test.go b/internal/cli/connection/core/sync/state_test.go new file mode 100644 index 000000000..dedafe601 --- /dev/null +++ b/internal/cli/connection/core/sync/state_test.go @@ -0,0 +1,153 @@ +// / ctx: https://ctx.ist +// ,'`./ do you remember? +// `.,'\ +// \ Copyright 2026-present Context contributors. +// SPDX-License-Identifier: Apache-2.0 + +package sync + +import ( + "errors" + "os" + "path/filepath" + "testing" + + "github.com/ActiveMemory/ctx/internal/config/fs" + cfgHub "github.com/ActiveMemory/ctx/internal/config/hub" + "github.com/ActiveMemory/ctx/internal/io" + "github.com/ActiveMemory/ctx/internal/testutil/testctx" +) + +// declareContext positions the test in a temp project with a +// materialized .context/ directory and returns its path. +func declareContext(t *testing.T) string { + t.Helper() + dir := t.TempDir() + ctxDir := testctx.Declare(t, dir) + if mkErr := os.MkdirAll(ctxDir, fs.PermExec); mkErr != nil { + t.Fatal(mkErr) + } + return ctxDir +} + +func TestLoadState_RejectsConcurrentSyncs(t *testing.T) { + declareContext(t) + + const attempts = 16 + + type outcome struct { + release func() + err error + } + + start := make(chan struct{}) + results := make(chan outcome, attempts) + for i := 0; i < attempts; i++ { + go func() { + <-start + _, release, lockErr := loadState() + results <- outcome{release: release, err: lockErr} + }() + } + close(start) + + // Winners must not release until every attempt has + // finished, or a late goroutine could legitimately + // re-acquire the freed lock and skew the count. + var winners []func() + var contended int + for i := 0; i < attempts; i++ { + got := <-results + switch { + case got.err == nil: + winners = append(winners, got.release) + case errors.Is(got.err, os.ErrExist): + contended++ + default: + t.Fatalf("unexpected error: %v", got.err) + } + } + + if len(winners) != 1 { + t.Errorf("winners = %d, want exactly 1", len(winners)) + } + if contended != attempts-1 { + t.Errorf( + "contended = %d, want %d", contended, attempts-1, + ) + } + for _, release := range winners { + release() + } +} + +func TestLoadState_ReleaseRemovesLock(t *testing.T) { + ctxDir := declareContext(t) + lockPath := filepath.Join( + ctxDir, cfgHub.DirHub, cfgHub.FileSyncLock, + ) + + _, release, lockErr := loadState() + if lockErr != nil { + t.Fatalf("first loadState: %v", lockErr) + } + if _, statErr := os.Stat(lockPath); statErr != nil { + t.Fatalf("lock file should exist while held: %v", statErr) + } + + release() + + if _, statErr := os.Stat(lockPath); !os.IsNotExist(statErr) { + t.Errorf( + "lock file should be gone after release: %v", statErr, + ) + } + + // The next sync must be able to proceed. + _, release, lockErr = loadState() + if lockErr != nil { + t.Fatalf("loadState after release: %v", lockErr) + } + release() +} + +func TestLoadState_ReleasesLockOnCorruptState(t *testing.T) { + ctxDir := declareContext(t) + hubDir := filepath.Join(ctxDir, cfgHub.DirHub) + if mkErr := io.SafeMkdirAll(hubDir, fs.PermKeyDir); mkErr != nil { + t.Fatal(mkErr) + } + statePath := filepath.Join(hubDir, cfgHub.FileSyncState) + if writeErr := os.WriteFile( + statePath, []byte("{not json"), fs.PermFile, + ); writeErr != nil { + t.Fatal(writeErr) + } + + _, _, loadErr := loadState() + if loadErr == nil { + t.Fatal("loadState should fail on corrupt state") + } + + lockPath := filepath.Join(hubDir, cfgHub.FileSyncLock) + if _, statErr := os.Stat(lockPath); !os.IsNotExist(statErr) { + t.Errorf( + "lock must not leak after a failed load: %v", statErr, + ) + } +} + +func TestLoadState_LockFileLocation(t *testing.T) { + ctxDir := declareContext(t) + + _, release, lockErr := loadState() + if lockErr != nil { + t.Fatalf("loadState: %v", lockErr) + } + defer release() + + want := filepath.Join(ctxDir, cfgHub.DirHub, cfgHub.FileSyncLock) + if _, statErr := os.Stat(want); statErr != nil { + t.Errorf("lock file not at %s: %v", want, statErr) + } +} diff --git a/internal/config/hub/doc.go b/internal/config/hub/doc.go index 0eb87a683..d22c657ab 100644 --- a/internal/config/hub/doc.go +++ b/internal/config/hub/doc.go @@ -59,7 +59,7 @@ // connection config files // - FilePID ("hub.pid"), FileAdminToken, // DirHubData: daemon management files -// - JSONIndent, LockSentinel, SuffixPluralMD: +// - JSONIndent, SuffixPluralMD: // formatting and naming helpers // // # Raft Cluster Configuration diff --git a/internal/config/hub/hub.go b/internal/config/hub/hub.go index 0d8fe4e70..16e7cc15d 100644 --- a/internal/config/hub/hub.go +++ b/internal/config/hub/hub.go @@ -167,8 +167,6 @@ const ( FileConnect = ".connect.enc" // JSONIndent is the indentation string for JSON marshaling. JSONIndent = " " - // LockSentinel is the content written to lock files. - LockSentinel = "lock" // SuffixPluralMD is the suffix for typed hub markdown // filenames (e.g. "decisions.md"). SuffixPluralMD = "s.md" diff --git a/specs/fix-connect-sync-lock-toctou.md b/specs/fix-connect-sync-lock-toctou.md new file mode 100644 index 000000000..021f9edd5 --- /dev/null +++ b/specs/fix-connect-sync-lock-toctou.md @@ -0,0 +1,87 @@ +# Fix Connect Sync Lock TOCTOU Race + +`internal/cli/connection/core/sync.loadState` guarded +`ctx connect sync` against concurrent execution with a +check-then-write pattern (`os.Stat` followed by +`io.SafeWriteFile`), not a real lock. Upstream issue: +[ActiveMemory/ctx#93](https://github.com/ActiveMemory/ctx/issues/93). + +## Problem + +The window between the existence check and the write is +unbounded, and the pattern is racy by construction: + +```go +// Acquire lock: fail if another sync is running. +if _, statErr := os.Stat(lockPath); statErr == nil { + return s, nil, os.ErrExist +} +if writeErr := io.SafeWriteFile( + lockPath, []byte(cfgHub.LockSentinel), fs.PermFile, +); writeErr != nil { + return s, nil, writeErr +} +``` + +Two processes racing past the `os.Stat` (hook-triggered plus +manual invocation; cron plus interactive run; two terminals) +can both decide the lock is free, both write the lock file, +both load the same `LastSequence`, both fetch the same entries +from the hub, and both write duplicate content into +`.context/hub/`. (Issue #93 says `.context/shared/`; that is +the pre-rename path — the renderer joins `cfgHub.DirHub` at +`internal/cli/connection/core/render/render.go:32`.) + +The doc comment ("Acquires a lock file to prevent concurrent +access.") overstated what the code did. + +The package had no test coverage at all: no test exercised +the contention path, the release path, or the lock location. + +## Solution + +Replace the stat-then-write with the atomic create-or-fail +primitive that already exists for exactly this purpose: +`io.SafeTryLock` (`O_CREATE|O_EXCL` in a single syscall) and +its counterpart `io.SafeUnlock`. Both landed with the dream +consolidator and have prior art at +`internal/cli/dream/core/pass/pass.go:62-70`. + +1. `internal/cli/connection/core/sync/state.go` — swap the + `os.Stat` / `io.SafeWriteFile` pair for one + `io.SafeTryLock` call. `acquired == false` maps to the + existing `os.ErrExist` contract so `Run`'s caller-facing + behavior is unchanged. The `release` closure delegates to + `io.SafeUnlock`, keeping the warn-on-failure logging. +2. `internal/config/hub/hub.go` — delete `LockSentinel`. Its + only consumer was the racy write; `SafeTryLock` creates an + empty lock file (the lock is the file's existence, not its + content), so the constant is dead. Leaving it would be a + broken window. +3. `internal/config/hub/doc.go` — drop `LockSentinel` from + the package-doc constant inventory. +4. `internal/cli/connection/core/sync/state_test.go` — new; + regression-pins the contract: + - `TestLoadState_RejectsConcurrentSyncs`: N goroutines + race `loadState`; exactly one acquires, the rest observe + `os.ErrExist`. + - `TestLoadState_ReleaseRemovesLock`: after `release()`, + the lock file is gone and a subsequent `loadState` + succeeds. + - `TestLoadState_ReleasesLockOnCorruptState`: a corrupt + `.sync-state.json` makes `loadState` fail *after* + acquisition; the lock must not leak. + - `TestLoadState_LockFileLocation`: the lock lives at + `/hub/.sync.lock` (pins `DirHub` + + `FileSyncLock` composition). + +## Out of Scope + +- Stale-lock detection (PID-in-lockfile, age-based cleanup). + A crashed process still leaves a stale lock; the issue + explicitly defers this to a follow-up. +- `flock`-based locking (issue's Option B). Rejected for now: + `syscall.Flock` is Unix-only and `SafeTryLock` already + matches the existing lockfile-as-sentinel model. +- The hubsync hook's silent error suppression + (ActiveMemory/ctx#100) — adjacent code, separate issue.