diff --git a/.claude/commands/stress-test-sync-sqlitecloud.md b/.claude/commands/stress-test-sync-sqlitecloud.md
index f7f1c757..c4f98f82 100644
--- a/.claude/commands/stress-test-sync-sqlitecloud.md
+++ b/.claude/commands/stress-test-sync-sqlitecloud.md
@@ -114,13 +114,13 @@ Create a bash script at `/tmp/stress_test_concurrent.sh` that:
- Each iteration does:
a. **UPDATE** — run `UPDATE
SET value = value + 1;` repeated `NUM_UPDATES` times (skip if 0)
b. **DELETE** — run `DELETE FROM WHERE rowid IN (SELECT rowid FROM ORDER BY RANDOM() LIMIT 10);` repeated `NUM_DELETES` times (skip if 0)
- c. **Sync using the 3-step send/check/check pattern:**
+ c. **Sync using the 3-step send/receive/receive pattern:**
1. `SELECT cloudsync_network_send_changes();` — send local changes to the server
- 2. `SELECT cloudsync_network_check_changes();` — ask the server to prepare a payload of remote changes
+ 2. `SELECT cloudsync_network_receive_changes();` — ask the server to prepare a payload of remote changes
3. Sleep 1 second (outside sqlite3, between two separate sqlite3 invocations)
- 4. `SELECT cloudsync_network_check_changes();` — download the prepared payload, if any
+ 4. `SELECT cloudsync_network_receive_changes();` — download the prepared payload, if any
- Each sqlite3 session must: `.load` the extension, call `cloudsync_network_init()`/`cloudsync_network_init_custom()`, `cloudsync_network_set_apikey()`/`cloudsync_network_set_token()` (depending on RLS mode), do the work, call `cloudsync_terminate()`
- - **Timing**: Log the wall-clock execution time (in milliseconds) for each `cloudsync_network_send_changes()`, `cloudsync_network_check_changes()` call. Define a `now_ms()` helper function at the top of the script and use it before and after each sqlite3 invocation that calls a network function, computing the delta. On **macOS**, `date` does not support `%3N` (nanoseconds) — use `python3 -c 'import time; print(int(time.time()*1000))'` instead. On **Linux**, `date +%s%3N` works fine. The script should detect the platform and define `now_ms()` accordingly. Log lines like: `[DB][iter ] send_changes: 123ms`, `[DB][iter ] check_changes_1: 45ms`, `[DB][iter ] check_changes_2: 67ms`
+ - **Timing**: Log the wall-clock execution time (in milliseconds) for each `cloudsync_network_send_changes()`, `cloudsync_network_receive_changes()` call. Define a `now_ms()` helper function at the top of the script and use it before and after each sqlite3 invocation that calls a network function, computing the delta. On **macOS**, `date` does not support `%3N` (nanoseconds) — use `python3 -c 'import time; print(int(time.time()*1000))'` instead. On **Linux**, `date +%s%3N` works fine. The script should detect the platform and define `now_ms()` accordingly. Log lines like: `[DB][iter ] send_changes: 123ms`, `[DB][iter ] receive_changes_1: 45ms`, `[DB][iter ] receive_changes_2: 67ms`
- Include labeled output lines like `[DB][iter ] updated count=, deleted count=` for grep-ability
3. **Launches all workers in parallel** using `&` and collects PIDs
@@ -138,7 +138,7 @@ Create a bash script at `/tmp/stress_test_concurrent.sh` that:
- Use `echo -e` to pipe generated SQL (with `\n` separators) into sqlite3
- During database initialization (Step 1), insert `ROWS` initial rows per database in a single transaction so each DB starts with data to update/delete. Row IDs should be unique across databases: `db_r`
- User IDs for rows must match the token's userId for RLS to work
-- The sync pattern requires **separate sqlite3 invocations** for send_changes and each check_changes call (with a 1-second sleep between the two check_changes calls), so that timing can be measured per-call from bash
+- The sync pattern requires **separate sqlite3 invocations** for send_changes and each receive_changes call (with a 1-second sleep between the two receive_changes calls), so that timing can be measured per-call from bash
- **stderr capture**: All sqlite3 invocations must redirect both stdout and stderr to the log file. Use `>> "$LOG" 2>&1` (in this order — stdout redirect first, then stderr to stdout). For timed calls that capture output in a variable, redirect stderr to the log file separately: `RESULT=$(echo -e "$SQL" | $SQLITE3 "$DB" 2>> "$LOG")` and then echo `$RESULT` to the log as well. This ensures "Runtime error" messages from sqlite3 are never lost.
- Use `/bin/bash` (not `/bin/sh`) for arrays and process management
@@ -191,7 +191,7 @@ Report the test results including:
| Rows per iteration | ROWS |
| Iterations per database | ITERATIONS |
| Total CRUD operations | N × ITERATIONS × (UPDATE_ALL + DELETE_FEW) |
-| Total sync operations | N × ITERATIONS × 3 (1 send_changes + 2 check_changes) |
+| Total sync operations | N × ITERATIONS × 3 (1 send_changes + 2 receive_changes) |
| Duration | start to finish time |
| Total errors | count |
| Error types | categorized list |
diff --git a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md
index c23b43c8..a7edae24 100644
--- a/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md
+++ b/.claude/commands/test-sync-roundtrip-sqlitecloud-rls.md
@@ -267,8 +267,8 @@ For each of the four SQLite databases, execute the sync operations:
SELECT cloudsync_network_send_changes();
-- Check for changes from server (repeat with 2-3 second delays)
-SELECT cloudsync_network_check_changes();
--- Repeat check_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes
+SELECT cloudsync_network_receive_changes();
+-- Repeat receive_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes
```
**Recommended sync order:**
@@ -276,7 +276,7 @@ SELECT cloudsync_network_check_changes();
2. Sync Database 2A (send + check)
3. Sync Database 1B (send + check)
4. Sync Database 2B (send + check)
-5. Re-sync all databases (check_changes) to ensure full propagation
+5. Re-sync all databases (receive_changes) to ensure full propagation
### Step 10: Verify RLS Enforcement
@@ -333,7 +333,7 @@ SELECT COUNT(*) FROM WHERE id = 'malicious_1';
**Also verify the malicious row does NOT appear in User 2's databases after syncing:**
```sql
-- In Database 2A or 2B (User 2)
-SELECT cloudsync_network_check_changes();
+SELECT cloudsync_network_receive_changes();
SELECT * FROM WHERE id = 'malicious_1';
-- Expected: 0 rows (the malicious row should not sync to legitimate User 2 databases)
```
@@ -405,7 +405,7 @@ The test FAILS if:
- Always use the Homebrew sqlite3 binary, NOT `/usr/bin/sqlite3`
- The cloudsync extension must be built first with `make`
- SQLiteCloud tables need cleanup before re-running tests
-- `cloudsync_network_check_changes()` may need multiple calls with delays
+- `cloudsync_network_receive_changes()` may need multiple calls with delays
- Run `SELECT cloudsync_terminate();` on SQLite connections before closing to properly cleanup memory
- Ensure both test users exist in Supabase auth before running the test
- The RLS policies must use `auth_userid()` to work with SQLiteCloud token authentication
diff --git a/.claude/commands/test-sync-roundtrip-supabase-rls.md b/.claude/commands/test-sync-roundtrip-supabase-rls.md
index ab40d011..d37f8e00 100644
--- a/.claude/commands/test-sync-roundtrip-supabase-rls.md
+++ b/.claude/commands/test-sync-roundtrip-supabase-rls.md
@@ -275,8 +275,8 @@ For each of the four SQLite databases, execute the sync operations:
SELECT cloudsync_network_send_changes();
-- Check for changes from server (repeat with 2-3 second delays)
-SELECT cloudsync_network_check_changes();
--- Repeat check_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes
+SELECT cloudsync_network_receive_changes();
+-- Repeat receive_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes
```
**Recommended sync order:**
@@ -284,7 +284,7 @@ SELECT cloudsync_network_check_changes();
2. Sync Database 2A (send + check)
3. Sync Database 1B (send + check)
4. Sync Database 2B (send + check)
-5. Re-sync all databases (check_changes) to ensure full propagation
+5. Re-sync all databases (receive_changes) to ensure full propagation
### Step 10: Verify RLS Enforcement
@@ -341,7 +341,7 @@ SELECT COUNT(*) FROM WHERE id = 'malicious_1';
**Also verify the malicious row does NOT appear in User 2's databases after syncing:**
```sql
-- In Database 2A or 2B (User 2)
-SELECT cloudsync_network_check_changes();
+SELECT cloudsync_network_receive_changes();
SELECT * FROM WHERE id = 'malicious_1';
-- Expected: 0 rows (the malicious row should not sync to legitimate User 2 databases)
```
@@ -414,7 +414,7 @@ The test FAILS if:
- Always use the Homebrew sqlite3 binary, NOT `/usr/bin/sqlite3`
- The cloudsync extension must be built first with `make`
- PostgreSQL tables need cleanup before re-running tests
-- `cloudsync_network_check_changes()` may need multiple calls with delays
+- `cloudsync_network_receive_changes()` may need multiple calls with delays
- Run `SELECT cloudsync_terminate();` on SQLite connections before closing to properly cleanup memory
- Ensure both test users exist in Supabase auth before running the test
- The RLS policies must use `auth.uid()` to work with Supabase JWT authentication
diff --git a/.claude/commands/test-sync-roundtrip-supabase.md b/.claude/commands/test-sync-roundtrip-supabase.md
index 091986ff..99e7c7f8 100644
--- a/.claude/commands/test-sync-roundtrip-supabase.md
+++ b/.claude/commands/test-sync-roundtrip-supabase.md
@@ -136,8 +136,8 @@ In the SQLite session:
SELECT cloudsync_network_send_changes();
-- Check for changes from server (repeat with 2-3 second delays)
-SELECT cloudsync_network_check_changes();
--- Repeat check_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes
+SELECT cloudsync_network_receive_changes();
+-- Repeat receive_changes 3-5 times with delays until it returns more than 0 received rows or stabilizes
-- Verify final data
SELECT * FROM ;
@@ -164,7 +164,7 @@ Report the test results including:
- Always use the Homebrew sqlite3 binary, NOT `/usr/bin/sqlite3`
- The cloudsync extension must be built first with `make`
- PostgreSQL tables need cleanup before re-running tests
-- `cloudsync_network_check_changes()` may need multiple calls with delays
+- `cloudsync_network_receive_changes()` may need multiple calls with delays
- run `SELECT cloudsync_terminate();` on SQLite connections before closing the properly cleanup the memory
## Permissions
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5a88eb84..21aa7dad 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -12,6 +12,13 @@ permissions:
pages: write
id-token: write
+# Cancel an in-progress run when the same branch is pushed again. This also keeps two
+# runs of the same branch from hitting the shared integration tenants concurrently
+# (the chunked negative-cache test needs an idle tenant — see the chunked env below).
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
build:
if: ${{ !contains(github.event.head_commit.message, '[auto-update]') }}
@@ -89,6 +96,12 @@ jobs:
INTEGRATION_TEST_CLOUDSYNC_ADDRESS: ${{ secrets.INTEGRATION_TEST_CLOUDSYNC_ADDRESS }}
INTEGRATION_TEST_OFFLINE_DATABASE_ID: ${{ secrets.INTEGRATION_TEST_OFFLINE_DATABASE_ID }}
INTEGRATION_TEST_FAILURE_DATABASE_ID: ${{ secrets.INTEGRATION_TEST_FAILURE_DATABASE_ID }}
+ # Chunked tests need an EXCLUSIVE tenant (the negative-cache test's Phase 2 asserts
+ # an idle tenant, so concurrent writers from other matrix legs cause false failures).
+ # Run them on a single leg only; every other leg gets '' and skips them (treated as
+ # unset by test_chunked_pair_open). The chunked network path is platform-agnostic C,
+ # so per-OS coverage adds little here.
+ INTEGRATION_TEST_CHUNKED_DATABASE_ID: ${{ (matrix.name == 'linux' && matrix.arch == 'x86_64') && secrets.INTEGRATION_TEST_CHUNKED_DATABASE_ID || '' }}
steps:
@@ -137,6 +150,7 @@ jobs:
-e INTEGRATION_TEST_CLOUDSYNC_ADDRESS="${{ env.INTEGRATION_TEST_CLOUDSYNC_ADDRESS }}" \
-e INTEGRATION_TEST_OFFLINE_DATABASE_ID="${{ env.INTEGRATION_TEST_OFFLINE_DATABASE_ID }}" \
-e INTEGRATION_TEST_FAILURE_DATABASE_ID="${{ env.INTEGRATION_TEST_FAILURE_DATABASE_ID }}" \
+ -e INTEGRATION_TEST_CHUNKED_DATABASE_ID="${{ env.INTEGRATION_TEST_CHUNKED_DATABASE_ID }}" \
alpine:latest \
tail -f /dev/null
docker exec alpine sh -c "apk update && apk add --no-cache gcc make curl sqlite openssl-dev musl-dev linux-headers"
@@ -212,6 +226,7 @@ jobs:
export INTEGRATION_TEST_CLOUDSYNC_ADDRESS="$INTEGRATION_TEST_CLOUDSYNC_ADDRESS"
export INTEGRATION_TEST_OFFLINE_DATABASE_ID="$INTEGRATION_TEST_OFFLINE_DATABASE_ID"
export INTEGRATION_TEST_FAILURE_DATABASE_ID="$INTEGRATION_TEST_FAILURE_DATABASE_ID"
+ export INTEGRATION_TEST_CHUNKED_DATABASE_ID="$INTEGRATION_TEST_CHUNKED_DATABASE_ID"
$(make test PLATFORM=$PLATFORM ARCH=$ARCH -n)
EOF
echo "::endgroup::"
diff --git a/API.md b/API.md
index 8e6e8250..9c3419f2 100644
--- a/API.md
+++ b/API.md
@@ -1,11 +1,12 @@
# API Reference
-This document provides a reference for the SQLite functions provided by the `sqlite-sync` extension.
+This document provides a reference for the SQL functions provided by the `sqlite-sync` extension. Unless noted otherwise, the APIs are available on both SQLite and PostgreSQL builds.
## Index
- [Configuration Functions](#configuration-functions)
- [`cloudsync_init()`](#cloudsync_inittable_name-crdt_algo-init_flags)
+ - [`cloudsync_set()`](#cloudsync_setkey-value)
- [`cloudsync_enable()`](#cloudsync_enabletable_name)
- [`cloudsync_disable()`](#cloudsync_disabletable_name)
- [`cloudsync_is_enabled()`](#cloudsync_is_enabledtable_name)
@@ -21,16 +22,24 @@ This document provides a reference for the SQLite functions provided by the `sql
- [`cloudsync_siteid()`](#cloudsync_siteid)
- [`cloudsync_db_version()`](#cloudsync_db_version)
- [`cloudsync_uuid()`](#cloudsync_uuid)
+ - [`cloudsync_uuid_text()`](#cloudsync_uuid_textuuid-dash_format)
+ - [`cloudsync_uuid_blob()`](#cloudsync_uuid_blobuuid)
- [Schema Alteration Functions](#schema-alteration-functions)
- [`cloudsync_begin_alter()`](#cloudsync_begin_altertable_name)
- [`cloudsync_commit_alter()`](#cloudsync_commit_altertable_name)
+- [Payload Functions](#payload-functions)
+ - [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq)
+ - [`cloudsync_payload_blob_checked()`](#cloudsync_payload_blob_checkedsince_db_version-since_seq-filter_site_id-exclude_filter_site_id-max_estimated_payload_size)
+ - [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id)
+ - [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload)
- [Network Functions](#network-functions)
- [`cloudsync_network_init()`](#cloudsync_network_initmanageddatabaseid)
- [`cloudsync_network_cleanup()`](#cloudsync_network_cleanup)
- [`cloudsync_network_set_token()`](#cloudsync_network_set_tokentoken)
- [`cloudsync_network_set_apikey()`](#cloudsync_network_set_apikeyapikey)
- [`cloudsync_network_send_changes()`](#cloudsync_network_send_changes)
- - [`cloudsync_network_check_changes()`](#cloudsync_network_check_changes)
+ - [`cloudsync_network_receive_changes()`](#cloudsync_network_receive_changesmax_chunks)
+ - [`cloudsync_network_check_changes()`](#cloudsync_network_check_changesmax_chunks) (deprecated)
- [`cloudsync_network_sync()`](#cloudsync_network_syncwait_ms-max_retries)
- [`cloudsync_network_reset_sync_version()`](#cloudsync_network_reset_sync_version)
- [`cloudsync_network_has_unsent_changes()`](#cloudsync_network_has_unsent_changes)
@@ -40,6 +49,37 @@ This document provides a reference for the SQLite functions provided by the `sql
## Configuration Functions
+### `cloudsync_set(key, value)`
+
+**Description:** Stores a global CloudSync setting in the current database. Settings persist across database reopens and are loaded automatically by the extension.
+
+The following payload setting is supported:
+
+| Key | Description | Default | Minimum | Maximum |
+|---|---|---:|---:|---:|
+| `payload_max_chunk_size` | Maximum transport payload size generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id). Values outside the range are clamped. | `5242880` (5 MB) | `262144` (256 KB) | `33554432` (32 MB) |
+
+`payload_max_chunk_size` affects only chunk generation. [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload) continues to accept legacy payloads, monolithic payloads, and v3 chunk-fragment payloads even when they are larger than the local setting. This preserves compatibility between peers using different settings.
+
+**Parameters:**
+
+- `key` (TEXT): The setting key.
+- `value` (TEXT): The setting value. For `payload_max_chunk_size`, pass the value in bytes.
+
+**Returns:** SQLite returns no value. PostgreSQL returns `true` on success.
+
+**Example:**
+
+```sql
+-- Use 1 MB transport chunks
+SELECT cloudsync_set('payload_max_chunk_size', '1048576');
+
+-- Restore the default 5 MB transport chunks
+SELECT cloudsync_set('payload_max_chunk_size', '5242880');
+```
+
+---
+
### `cloudsync_init(table_name, [crdt_algo], [init_flags])`
**Description:** Initializes a table for `sqlite-sync` synchronization. This function is idempotent and needs to be called only once per table on each site; configurations are stored in the database and automatically loaded with the extension.
@@ -363,6 +403,45 @@ INSERT INTO products (id, name) VALUES (cloudsync_uuid(), 'New Product');
---
+### `cloudsync_uuid_text(uuid, [dash_format])`
+
+**Description:** Converts a 16-byte binary UUID (such as the `site_id` stored in `cloudsync_changes`, or the value returned by [`cloudsync_siteid()`](#cloudsync_siteid)) into its canonical string form.
+
+**Parameters:**
+
+- `uuid` (BLOB/BYTEA): The 16-byte UUID. Returns `NULL` if `uuid` is `NULL`; raises an error if it is not exactly 16 bytes.
+- `dash_format` (BOOLEAN, optional, default `true`): When `true`, returns the canonical 36-character dashed form (`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`); when `false`, returns the bare 32-character hex form.
+
+**Returns:** The UUID as a TEXT value (lowercase hex).
+
+**Example:**
+
+```sql
+SELECT cloudsync_uuid_text(cloudsync_siteid()); -- 0190a1b2-c3d4-7e5f-8a9b-001122334455
+SELECT cloudsync_uuid_text(cloudsync_siteid(), false); -- 0190a1b2c3d47e5f8a9b001122334455
+```
+
+---
+
+### `cloudsync_uuid_blob(uuid)`
+
+**Description:** Converts a UUID string into its 16-byte binary form. This is the inverse of [`cloudsync_uuid_text()`](#cloudsync_uuid_textuuid-dash_format) and lets string-based callers (for example, an HTTP `/check` endpoint holding a stringified `site_id`) pass a `site_id` to [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id).
+
+**Parameters:**
+
+- `uuid` (TEXT): A UUID string. Tolerant: accepts the canonical dashed form and the bare 32-hex form, case-insensitive. Returns `NULL` if `uuid` is `NULL`; raises an error on malformed input.
+
+**Returns:** The 16-byte UUID as a BLOB/BYTEA.
+
+**Example:**
+
+```sql
+SELECT cloudsync_uuid_blob('0190a1b2-c3d4-7e5f-8a9b-001122334455');
+SELECT cloudsync_uuid_blob('0190A1B2C3D47E5F8A9B001122334455');
+```
+
+---
+
## Schema Alteration Functions
### `cloudsync_begin_alter(table_name)`
@@ -409,6 +488,185 @@ SELECT cloudsync_commit_alter('my_table');
---
+## Payload Functions
+
+### `cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq)`
+
+**Description:** Encodes rows from `cloudsync_changes` into a single monolithic payload. This is the legacy payload API and remains fully supported for backward compatibility.
+
+Use this API when the expected payload size is modest or when you need to interoperate with callers that expect a single BLOB. For large rowsets or large individual BLOB/TEXT values, prefer [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id), which splits transport payloads according to `payload_max_chunk_size`.
+
+**Parameters:** The function is an aggregate over the columns returned by `cloudsync_changes`:
+
+- `tbl` (TEXT): Source table name.
+- `pk` (BLOB): Encoded primary key.
+- `col_name` (TEXT): Changed column name.
+- `col_value` (BLOB): Encoded column value.
+- `col_version` (INTEGER/BIGINT): Column version.
+- `db_version` (INTEGER/BIGINT): Source database version.
+- `site_id` (BLOB): Source site identifier.
+- `cl` (INTEGER/BIGINT): Causal length.
+- `seq` (INTEGER/BIGINT): Sequence number within the source database version.
+
+**Returns:** A single payload BLOB.
+
+**Example:**
+
+```sql
+SELECT cloudsync_payload_encode(
+ tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq
+) AS payload
+FROM cloudsync_changes;
+```
+
+---
+
+### `cloudsync_payload_blob_checked(since_db_version, since_seq, filter_site_id, exclude_filter_site_id, max_estimated_payload_size)`
+
+**Description:** Generates one monolithic payload BLOB for a selected change window, but only after an internal conservative size check passes.
+
+This helper is intended for servers that still need to support old clients on a `/check` path that expects one monolithic payload. It keeps the server to a single SQL round trip while avoiding payload allocation when the selected window is too large.
+
+Internally, CloudSync first estimates the uncompressed payload body plus header. If that estimate exceeds `max_estimated_payload_size`, the function raises a limit-exceeded error and does not materialize the payload. Empty windows return `NULL`.
+
+When the check passes, this function scans the selected change window twice: once to estimate and once to encode. This avoids unsafe monolithic allocation, but successful calls are more I/O-expensive than a direct single-pass `cloudsync_payload_encode()` over the same rows.
+
+**Parameters:**
+
+- `since_db_version` (INTEGER/BIGINT): Start after this source database version, except rows at the same version with `seq > since_seq` are still included.
+- `since_seq` (INTEGER/BIGINT): Sequence cursor within `since_db_version`.
+- `filter_site_id` (BLOB): Site ID to filter on. With `exclude_filter_site_id` unset/`false` it selects changes **from** this site; with `exclude_filter_site_id` `true` it selects changes from every site **except** this one. If `NULL` and not excluding, CloudSync uses the local site ID.
+- `exclude_filter_site_id` (BOOLEAN): When `true`, encode changes from all sites **except** `filter_site_id`. Setting it `true` without a `filter_site_id` is an error.
+- `max_estimated_payload_size` (INTEGER/BIGINT): Maximum allowed conservative payload-size estimate, in bytes. Must be positive.
+
+**Returns:** A monolithic payload BLOB, or `NULL` when no changes match.
+
+**Examples:**
+
+```sql
+-- /check download guard: all changes EXCEPT the requesting peer's site
+SELECT cloudsync_payload_blob_checked(
+ 100,
+ 0,
+ cloudsync_uuid_blob('0190a1b2-c3d4-7e5f-8a9b-001122334455'),
+ true,
+ 10485760
+);
+```
+
+---
+
+### `cloudsync_payload_chunks([since_db_version], [filter_site_id], [until_db_version], [exclude_filter_site_id])`
+
+**Description:** Generates sync payloads as a stream of transport-sized chunks. It is the chunk-aware evolution of [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq), designed for large rowsets and for single BLOB/TEXT values that are larger than the configured chunk size.
+
+The maximum generated chunk size is controlled by the global `payload_max_chunk_size` setting. The default is 5 MB, the technical minimum is 256 KB, and the technical maximum is 32 MB (values outside this range are clamped):
+
+```sql
+SELECT cloudsync_set('payload_max_chunk_size', '5242880');
+```
+
+When a single encoded column value does not fit in one chunk, CloudSync transparently emits v3 payload fragments for that value. The receiver stages fragments internally and applies the value when all parts arrive. Fragments can arrive out of order; incomplete stale fragment groups are cleaned up automatically.
+
+`cloudsync_payload_chunks()` does not change the apply contract: [`cloudsync_payload_apply()`](#cloudsync_payload_applypayload) accepts legacy payloads, monolithic payloads, and v3 chunk-fragment payloads. The local `payload_max_chunk_size` setting is not used to reject incoming payloads.
+
+**Important memory note:** chunking limits the size of each transport payload that CloudSync generates. It does not remove the database engine's need to materialize a single final cell value when applying a very large BLOB/TEXT column. In other words, a 500 MB BLOB can be transported in smaller chunks, but the receiving database must still be able to store and bind the completed 500 MB value when that row is applied.
+
+**Parameters:**
+
+- `since_db_version` (INTEGER/BIGINT, optional): Start after this source database version. If omitted, CloudSync uses the stored send checkpoint.
+- `filter_site_id` (BLOB, optional): Site ID to filter on. With `exclude_filter_site_id` unset/`false` it selects changes **from** this site; with `exclude_filter_site_id` `true` it selects changes from every site **except** this one. If omitted (and not excluding), CloudSync uses the local site ID.
+- `until_db_version` (INTEGER/BIGINT, optional): Upper watermark to include. If omitted or `0`, CloudSync captures the current maximum source database version before streaming chunks.
+- `exclude_filter_site_id` (BOOLEAN, optional, default `false`): When `true`, stream changes from all sites **except** `filter_site_id`. This is what the `/check` download path needs — a peer must not receive its own changes back. Setting it `true` without a `filter_site_id` is an error. The site_id stored in `cloudsync_changes` is the 16-byte binary UUID; string callers can convert with [`cloudsync_uuid_blob()`](#cloudsync_uuid_blobuuid).
+
+**Returns:** A rowset with one row per chunk:
+
+| Column | Description |
+|---|---|
+| `payload` | Payload BLOB to pass to `cloudsync_payload_apply()`. |
+| `chunk_index` | Zero-based chunk index for this stream. |
+| `payload_size` | Payload size in bytes. |
+| `rows` | Number of encoded payload rows in this chunk. Fragment chunks usually contain one fragment row. |
+| `db_version_min` | Minimum source `db_version` represented by this chunk. |
+| `db_version_max` | Maximum source `db_version` represented by this chunk. |
+| `watermark_db_version` | Stable upper watermark captured for this chunk stream. Store this after all chunks are durably transferred/applied. |
+
+**SQLite usage:** `cloudsync_payload_chunks` is exposed as a virtual table with hidden constraint columns:
+
+```sql
+-- Default: uses the stored send checkpoint and local site id
+SELECT payload, chunk_index, payload_size, watermark_db_version
+FROM cloudsync_payload_chunks
+ORDER BY chunk_index;
+
+-- Explicit arguments through hidden columns
+SELECT payload, chunk_index, payload_size, watermark_db_version
+FROM cloudsync_payload_chunks
+WHERE since_db_version = 100
+ AND site_id = cloudsync_siteid()
+ AND until_db_version = 200
+ORDER BY chunk_index;
+
+-- /check download: all changes EXCEPT the requesting peer's site
+SELECT payload, chunk_index, watermark_db_version
+FROM cloudsync_payload_chunks
+WHERE since_db_version = 100
+ AND site_id = cloudsync_uuid_blob('0190a1b2-c3d4-7e5f-8a9b-001122334455')
+ AND exclude_filter_site_id = 1
+ORDER BY chunk_index;
+```
+
+**PostgreSQL usage:** `cloudsync_payload_chunks` is exposed as a set-returning function with optional arguments:
+
+```sql
+-- Default: uses the stored send checkpoint and local site id
+SELECT *
+FROM cloudsync_payload_chunks();
+
+-- Explicit arguments
+SELECT *
+FROM cloudsync_payload_chunks(100, cloudsync_siteid(), 200);
+
+-- /check download: all changes EXCEPT the requesting peer's site
+SELECT *
+FROM cloudsync_payload_chunks(100, cloudsync_uuid_blob('0190a1b2-c3d4-7e5f-8a9b-001122334455'), NULL, true);
+```
+
+**Apply example:**
+
+```sql
+-- Apply chunks on a receiving peer. Chunks may be applied one at a time.
+SELECT cloudsync_payload_apply(?);
+```
+
+On PostgreSQL, apply chunks as individual statements from the transport/client layer. Do not use a set-based statement such as `SELECT cloudsync_payload_apply(payload) FROM chunks_table;` while reading payloads from a table in the same database session. `cloudsync_payload_apply()` performs writes through SPI, and applying while the same statement is still scanning a payload table can conflict with PostgreSQL executor resource ownership. Fetch each payload into the client (or into a local procedural variable after the read completes) and then call `cloudsync_payload_apply()` for that single payload.
+
+---
+
+### `cloudsync_payload_apply(payload)`
+
+**Description:** Applies a sync payload to the current database. The function accepts all supported payload formats:
+
+- Legacy payloads generated by older SQLite Sync versions.
+- Monolithic payloads generated by [`cloudsync_payload_encode()`](#cloudsync_payload_encodetbl-pk-col_name-col_value-col_version-db_version-site_id-cl-seq).
+- Chunk-fragment payloads generated by [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id).
+
+When a v3 fragment payload is received, CloudSync stores the fragment in an internal table and returns after applying zero or more completed values. Once the final fragment for a value is received, the completed value is validated and applied. Duplicate fragment delivery is idempotent.
+
+**Parameters:**
+
+- `payload` (BLOB/BYTEA): Payload BLOB to apply.
+
+**Returns:** Number of payload rows applied. Fragment payloads that are staged but not yet complete can return `0`.
+
+**Example:**
+
+```sql
+SELECT cloudsync_payload_apply(:payload);
+```
+
+---
+
## Network Functions
### `cloudsync_network_init(managedDatabaseId)`
@@ -500,112 +758,146 @@ This means: if you get JSON back, the server was reachable and the network proto
**Description:** Sends all unsent local changes to the remote server.
+The send path streams payloads through [`cloudsync_payload_chunks()`](#cloudsync_payload_chunkssince_db_version-filter_site_id-until_db_version-exclude_filter_site_id), so `payload_max_chunk_size` also limits the payloads generated for network transport. Each generated chunk is uploaded/applied independently; the local send checkpoint is advanced only after the chunk stream completes successfully.
+
+Chunk transport is transparent to the CloudSync backend. Each chunk is sent as a normal `/apply` payload, either inline as a base64 `blob` or through the upload `url` path. There is no separate chunk flag: old payloads, monolithic payloads, and v3 fragment payloads are distinguished by the payload format itself.
+
**Parameters:** None.
**Returns:** A JSON string with the send result:
```json
-{"send": {"status": "synced|syncing|out-of-sync|error", "localVersion": N, "serverVersion": N, "lastFailure": {...}}}
+{"send": {"status": "synced|syncing|out-of-sync|error", "localVersion": N, "serverVersion": N, "chunks": C, "bytes": B, "lastFailure": {...}}}
```
- `send.status`: The current sync state — `"synced"` (all changes confirmed), `"syncing"` (changes sent but not yet confirmed), `"out-of-sync"` (local changes pending or gaps detected), or `"error"`.
- `send.localVersion`: The latest local database version.
- `send.serverVersion`: The latest version confirmed by the server.
-- `send.lastFailure` (optional): Present only when the server reports a failed apply job. Forwarded verbatim from the server's `failures.apply` and typically includes `jobId`, `code`, `stage`, `message`, `retryable`, and `failedAt`. It is emitted regardless of `status` so callers can detect server-side failures during `"syncing"` or even after the state has nominally recovered. This function is **send/apply-scoped**: server-reported check-job failures (`failures.check`) are not surfaced here — see [`cloudsync_network_check_changes()`](#cloudsync_network_check_changes) and [`cloudsync_network_sync()`](#cloudsync_network_sync).
+- `send.chunks`: The number of payload chunks sent this call (a large push is split into multiple transport chunks bounded by `payload_max_chunk_size`). `0` when there were no local changes to send.
+- `send.bytes`: The total serialized payload bytes sent this call (uncompressed cloudsync payload size, summed across chunks; not the compressed wire size).
+- `send.lastFailure` (optional): Present only when the server reports a failed apply job. Forwarded verbatim from the server's `failures.apply` and typically includes `jobId`, `code`, `stage`, `message`, `retryable`, and `failedAt`. It is emitted regardless of `status` so callers can detect server-side failures during `"syncing"` or even after the state has nominally recovered. This function is **send/apply-scoped**: server-reported check-job failures (`failures.check`) are not surfaced here — see [`cloudsync_network_receive_changes()`](#cloudsync_network_receive_changesmax_chunks) and [`cloudsync_network_sync()`](#cloudsync_network_syncwait_ms-max_retries).
**Example:**
```sql
SELECT cloudsync_network_send_changes();
--- '{"send":{"status":"synced","localVersion":5,"serverVersion":5}}'
+-- '{"send":{"status":"synced","localVersion":5,"serverVersion":5,"chunks":1,"bytes":2048}}'
-- With a server-reported failure (e.g. unknown schema hash on the server side):
--- '{"send":{"status":"out-of-sync","localVersion":1,"serverVersion":0,"lastFailure":{"jobId":44961,"code":"internal_error","stage":"apply_payload","message":"cloudsync operation failed: Cannot apply the received payload because the schema hash is unknown 4288148391734624266.","retryable":true,"failedAt":"2026-04-15T22:21:09.018606Z"}}}'
+-- '{"send":{"status":"out-of-sync","localVersion":1,"serverVersion":0,"chunks":1,"bytes":512,"lastFailure":{"jobId":44961,"code":"internal_error","stage":"apply_payload","message":"cloudsync operation failed: Cannot apply the received payload because the schema hash is unknown 4288148391734624266.","retryable":true,"failedAt":"2026-04-15T22:21:09.018606Z"}}}'
```
---
-### `cloudsync_network_check_changes()`
+### `cloudsync_network_receive_changes([max_chunks])`
+
+**Description:** Receives new changes from the remote server and applies them to the local database. (Formerly `cloudsync_network_check_changes()`, which remains available as a deprecated alias — see below.)
-**Description:** Checks the remote server for new changes and applies them to the local database.
+If changes are already prepared for the local site, they are downloaded and applied. If nothing is ready yet, the server starts preparing a package asynchronously and this call returns having applied nothing; a later call retrieves it. This function does **not** wait/poll for preparation to finish — it applies what is available now. To force an update and wait for not-yet-ready changes, use [`cloudsync_network_sync(wait_ms, max_retries)`](#cloudsync_network_syncwait_ms-max_retries).
-If a package of new changes is already available for the local site, the server returns it immediately, and the changes are applied. If no package is ready, the server returns an empty response and starts an asynchronous process to prepare a new package. This new package can be retrieved with a subsequent call to this function.
+By default this function **drains all currently-available chunks** in one call. Pass `max_chunks` to cap how many chunks are applied per call, for caller-driven progress or traffic control:
-This function is designed to be called periodically to keep the local database in sync.
-To force an update and wait for changes (with a timeout), use [`cloudsync_network_sync(wait_ms, max_retries)`].
+```sql
+-- Drain at most 5 chunks, loop until the stream is complete
+SELECT cloudsync_network_receive_changes(5) ->> '$.receive.complete';
+```
+
+The drain position (the per-stream page cursor) is held **in memory** on the network context, so a capped drain resumes where it left off on the next call — the caller does not manage any cursor; it just loops while `receive.complete` is `false`. If the connection is closed or the process restarts mid-drain, the cursor is lost and the next call safely restarts the drain from the beginning of the stream: already-applied chunks are re-downloaded and re-applied idempotently, so **no rows are skipped** — only redundant download is incurred. This is safe because the durable receive checkpoint (`check_dbversion`/`check_seq`) only advances after a stream has been **fully** applied, never in the middle of a source `db_version`.
If the network is misconfigured or the remote server is unreachable, the function raises a SQL error. If the received payload cannot be applied locally (for example because of an unknown schema hash), the error is returned as a `receive.error` field in the JSON response. If the server reports an unresolved failed check job (e.g. an `encode_changes` failure), that failure is forwarded as a `receive.lastFailure` object.
-**Parameters:** None.
+**Parameters:**
+
+- `max_chunks` (INTEGER, optional): Maximum number of chunks to apply this call. Omit or pass `0` (or negative) to drain everything available. A positive value caps the drain; `receive.complete` will be `false` when the cap stops a drain that still has pending chunks.
**Returns:** A JSON string with the receive result:
```json
-{"receive": {"rows": N, "tables": ["table1", "table2"], "error": "...", "lastFailure": {...}}}
+{"receive": {"rows": N, "tables": ["table1", "table2"], "chunks": C, "bytes": B, "complete": true, "error": "...", "lastFailure": {...}}}
```
-- `receive.rows`: The number of rows received and applied to the local database. `0` when the receive phase failed.
-- `receive.tables`: An array of table names that received changes. Empty (`[]`) if no changes were applied or the receive phase failed.
+- `receive.rows`: The total number of rows received and applied to the local database, summed across all chunks drained this call. `0` when the receive phase failed, when nothing was available, or when only intermediate fragments were staged without completing a value.
+- `receive.tables`: An array of table names that received changes (the union across all drained chunks). Empty (`[]`) if no changes were applied or the receive phase failed.
+- `receive.chunks`: The number of payload chunks applied by this call. `0` when nothing was ready, `1` for a single monolithic/inline page, and `N` for a drained `N`-chunk stream (bounded by `max_chunks` if given).
+- `receive.bytes`: The total serialized payload bytes received this call (uncompressed cloudsync payload size, summed across chunks; transport-independent, not the compressed wire size). Useful for byte-budgeted draining together with `max_chunks`.
+- `receive.complete` (boolean): `true` when the receive stream is fully drained (nothing pending), `false` when more chunks remain — because `max_chunks` capped the drain, or it stopped early. When `false`, call this function again to continue.
- `receive.error` (optional, string): Present when client-side `cloudsync_payload_apply` failed. Contains a human-readable error message describing why the received payload could not be applied.
-- `receive.lastFailure` (optional, object): Present only when the server reports a failed check job. Forwarded verbatim from the server's `failures.check` and typically includes `jobId`, `dbVersion`, `seq`, `code`, `stage`, `message`, `retryable`, and `failedAt`. Distinct from `receive.error`: `receive.error` describes a client-side apply failure (string), while `receive.lastFailure` describes a server-side check-job failure (object). Both can coexist in the same response. This function is **check-scoped**: server-reported apply-job failures (`failures.apply`) are not surfaced here — see [`cloudsync_network_send_changes()`](#cloudsync_network_send_changes) and [`cloudsync_network_sync()`](#cloudsync_network_sync).
+- `receive.lastFailure` (optional, object): Present only when the server reports a failed check job. Forwarded verbatim from the server's `failures.check` and typically includes `jobId`, `dbVersion`, `seq`, `code`, `stage`, `message`, `retryable`, and `failedAt`. Distinct from `receive.error`: `receive.error` describes a client-side apply failure (string), while `receive.lastFailure` describes a server-side check-job failure (object). Both can coexist in the same response. This function is **check-scoped**: server-reported apply-job failures (`failures.apply`) are not surfaced here — see [`cloudsync_network_send_changes()`](#cloudsync_network_send_changes) and [`cloudsync_network_sync()`](#cloudsync_network_syncwait_ms-max_retries).
**Example:**
```sql
-SELECT cloudsync_network_check_changes();
--- '{"receive":{"rows":3,"tables":["tasks"]}}'
+SELECT cloudsync_network_receive_changes();
+-- '{"receive":{"rows":3,"tables":["tasks"],"chunks":1,"bytes":820,"complete":true}}'
+
+-- Capped drain with more pending (call again to continue):
+-- '{"receive":{"rows":40,"tables":["docs"],"chunks":5,"bytes":1310720,"complete":false}}'
-- With a client-side apply error:
--- '{"receive":{"rows":0,"tables":[],"error":"Cannot apply the received payload because the schema hash is unknown 7218827471400075525."}}'
+-- '{"receive":{"rows":0,"tables":[],"chunks":0,"bytes":0,"complete":true,"error":"Cannot apply the received payload because the schema hash is unknown 7218827471400075525."}}'
-- With a server-reported check-job failure:
--- '{"receive":{"rows":0,"tables":[],"lastFailure":{"jobId":456,"dbVersion":15,"seq":1,"code":"tenant_unreachable","stage":"encode_changes","message":"tenant check failed","retryable":true,"failedAt":"2026-04-24T10:22:00Z"}}}'
+-- '{"receive":{"rows":0,"tables":[],"chunks":0,"bytes":0,"complete":true,"lastFailure":{"jobId":456,"dbVersion":15,"seq":1,"code":"tenant_unreachable","stage":"encode_changes","message":"tenant check failed","retryable":true,"failedAt":"2026-04-24T10:22:00Z"}}}'
```
---
+### `cloudsync_network_check_changes([max_chunks])`
+
+> **Deprecated:** use [`cloudsync_network_receive_changes()`](#cloudsync_network_receive_changesmax_chunks). This name is retained as a thin alias for backward compatibility and will be removed in a future major version. It behaves identically, including the optional `max_chunks` argument and all returned fields.
+
+---
+
### `cloudsync_network_sync([wait_ms], [max_retries])`
**Description:** Performs a full synchronization cycle. This function has two overloads:
- `cloudsync_network_sync()`: Performs one send operation and one check operation.
-- `cloudsync_network_sync(wait_ms, max_retries)`: Performs one send operation and then repeatedly tries to download remote changes until at least one change is downloaded or `max_retries` times has been reached, waiting `wait_ms` between retries.
+- `cloudsync_network_sync(wait_ms, max_retries)`: Performs one send operation and then downloads remote changes.
+
+When the server delivers changes as a stream of chunks, this function drains the **whole stream in a single call**: as long as the next chunk is already available it is fetched back-to-back with no delay. `wait_ms` and `max_retries` are spent only while the server payload is **not yet ready** (the server is still preparing a package): in that case the function waits `wait_ms` and retries up to `max_retries` times. They are not consumed while paging through chunks that are already available.
**Parameters:**
-- `wait_ms` (INTEGER, optional): The time to wait in milliseconds between retries. Defaults to 100.
-- `max_retries` (INTEGER, optional): The maximum number of times to retry the synchronization. Defaults to 1.
+- `wait_ms` (INTEGER, optional): The time to wait in milliseconds between retries while the server payload is not yet ready. Defaults to 100.
+- `max_retries` (INTEGER, optional): The maximum number of poll attempts while the server payload is not yet ready. Defaults to 1.
**Returns:** A JSON string with the full sync result, combining send and receive:
```json
{
- "send": {"status": "synced|syncing|out-of-sync|error", "localVersion": N, "serverVersion": N, "lastFailure": {...}},
- "receive": {"rows": N, "tables": ["table1", "table2"], "error": "...", "lastFailure": {...}}
+ "send": {"status": "synced|syncing|out-of-sync|error", "localVersion": N, "serverVersion": N, "chunks": C, "bytes": B, "lastFailure": {...}},
+ "receive": {"rows": N, "tables": ["table1", "table2"], "chunks": C, "bytes": B, "complete": true, "error": "...", "lastFailure": {...}}
}
```
- `send.status`: The current sync state — `"synced"`, `"syncing"`, `"out-of-sync"`, or `"error"`.
- `send.localVersion`: The latest local database version.
- `send.serverVersion`: The latest version confirmed by the server.
+- `send.chunks` / `send.bytes`: Number of payload chunks sent and total serialized payload bytes sent during the send phase. Same semantics as in [`cloudsync_network_send_changes()`](#cloudsync_network_send_changes).
- `send.lastFailure` (optional): Same semantics as in [`cloudsync_network_send_changes()`](#cloudsync_network_send_changes) — forwarded verbatim from the server's `failures.apply` whenever a failed apply job is reported, regardless of `status`.
-- `receive.rows`: The number of rows received and applied during the check phase. `0` when the receive phase failed.
-- `receive.tables`: An array of table names that received changes. Empty (`[]`) if no changes were applied or the receive phase failed.
-- `receive.error` (optional, string): Present when client-side `cloudsync_payload_apply` failed (for example `"Cannot apply the received payload because the schema hash is unknown 7218827471400075525."`). The send result is always preserved so the caller can tell that local changes reached the server even when applying incoming changes failed. The retry loop breaks immediately on apply errors, since failures like schema-hash mismatches do not heal across retries. Endpoint/network errors during the receive phase raise a SQL error instead.
-- `receive.lastFailure` (optional, object): Same semantics as in [`cloudsync_network_check_changes()`](#cloudsync_network_check_changes) — forwarded verbatim from the server's `failures.check` whenever a failed check job is reported. Distinct from `receive.error`. `cloudsync_network_sync()` reports both `send.lastFailure` and `receive.lastFailure` when present.
+- `receive.rows`: The **total** number of rows received and applied during the receive phase, summed across **all** chunks drained in this call. `0` when the receive phase failed.
+- `receive.tables`: An array of table names that received changes (the union across all drained chunks). Empty (`[]`) if no changes were applied or the receive phase failed.
+- `receive.chunks`: The number of payload chunks applied in this call. `0` when nothing was ready, `1` for a single monolithic/inline page, and `N` for a fully drained `N`-chunk stream. `cloudsync_network_sync()` always drains the whole stream (it does not cap chunks).
+- `receive.bytes`: The total serialized payload bytes received this call (uncompressed cloudsync payload size, summed across chunks; not the compressed wire size). Same semantics as in [`cloudsync_network_receive_changes()`](#cloudsync_network_receive_changesmax_chunks).
+- `receive.complete` (boolean): `true` when the server stream was fully drained, `false` when the download stopped before the final chunk (an error occurred, or an internal safety bound was reached). When `false`, call `cloudsync_network_sync()` again to resume; re-delivered rows are idempotent.
+- `receive.error` (optional, string): Present when client-side `cloudsync_payload_apply` failed (for example `"Cannot apply the received payload because the schema hash is unknown 7218827471400075525."`). The send result is always preserved so the caller can tell that local changes reached the server even when applying incoming changes failed. The receive drain stops immediately on apply errors, since failures like schema-hash mismatches do not heal across retries. Endpoint/network errors during the receive phase raise a SQL error instead.
+- `receive.lastFailure` (optional, object): Same semantics as in [`cloudsync_network_receive_changes()`](#cloudsync_network_receive_changesmax_chunks) — forwarded verbatim from the server's `failures.check` whenever a failed check job is reported. Distinct from `receive.error`. `cloudsync_network_sync()` reports both `send.lastFailure` and `receive.lastFailure` when present.
**Example:**
```sql
-- Perform a single synchronization cycle
SELECT cloudsync_network_sync();
--- '{"send":{"status":"synced","localVersion":5,"serverVersion":5},"receive":{"rows":3,"tables":["tasks"]}}'
+-- '{"send":{"status":"synced","localVersion":5,"serverVersion":5,"chunks":1,"bytes":2048},"receive":{"rows":3,"tables":["tasks"],"chunks":1,"bytes":820,"complete":true}}'
-- Perform a synchronization cycle with custom retry settings
SELECT cloudsync_network_sync(500, 3);
+-- A large download drained as a multi-chunk stream in a single call:
+-- '{"send":{"status":"synced","localVersion":42,"serverVersion":42,"chunks":0,"bytes":0},"receive":{"rows":1200,"tables":["docs"],"chunks":7,"bytes":1835008,"complete":true}}'
-- Receive phase failed but send phase completed — the error is surfaced in JSON, not as a SQL error:
--- '{"send":{"status":"synced","localVersion":5,"serverVersion":5},"receive":{"rows":0,"tables":[],"error":"Cannot apply the received payload because the schema hash is unknown 7218827471400075525."}}'
+-- '{"send":{"status":"synced","localVersion":5,"serverVersion":5,"chunks":1,"bytes":512},"receive":{"rows":0,"tables":[],"chunks":0,"bytes":0,"complete":false,"error":"Cannot apply the received payload because the schema hash is unknown 7218827471400075525."}}'
```
---
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 158a6ff2..f9c37640 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,38 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
+## [1.1.0] - Unreleased
+
+### Added
+
+- **Chunked payload generation** via `cloudsync_payload_chunks()`, available as a SQLite virtual table and as a PostgreSQL set-returning function. The API emits transport-sized payload chunks and transparently fragments oversized BLOB/TEXT values into v3 fragment payloads.
+- **`payload_max_chunk_size` global setting** for controlling generated chunk size. The default is 5 MB and values are clamped to the 256 KB technical minimum and the 32 MB technical maximum.
+- **`exclude_filter_site_id` argument** for `cloudsync_payload_chunks()`. When set, the function streams changes from every site **except** `filter_site_id`, which is what the `/check` download path needs (a peer must not receive its own changes back). The default (omitted/`false`) preserves the existing single-site behavior. Passing the flag without a `filter_site_id` is an error.
+- **`cloudsync_payload_blob_checked()`** scalar function on both SQLite and PostgreSQL. It performs an internal conservative size check before generating one monolithic legacy payload BLOB, allowing `/check` endpoints to support old clients with one SQL round trip while rejecting unsafe responses before payload materialization.
+- **`cloudsync_uuid_text()` / `cloudsync_uuid_blob()`** scalar functions on both SQLite and PostgreSQL, converting between the 16-byte binary `site_id` and its canonical UUID string. `cloudsync_uuid_text()` takes an optional `dash_format` argument (default `true`); `cloudsync_uuid_blob()` accepts dashed or undashed, case-insensitive input. These let string-based callers (e.g. the `/check` endpoint) pass a `site_id` to `cloudsync_payload_chunks()`.
+- **Payload chunking documentation** in `API.md` and `PERFORMANCE.md`, including the explicit memory note that chunking bounds transport payloads but the database must still materialize a completed single BLOB/TEXT value when it is applied.
+- **PostgreSQL `1.0 -> 1.1` upgrade script** (`migrations/cloudsync--1.0--1.1.sql`) for the new chunked-payload SQL surface, so existing deployments can `ALTER EXTENSION cloudsync UPDATE`.
+- **`cloudsync_network_receive_changes([max_chunks])`** as the canonical name for receiving changes (the old `cloudsync_network_check_changes()` is retained as a deprecated alias). It drains all currently-available chunks by default; the optional `max_chunks` argument caps how many chunks are applied per call for caller-driven progress/traffic control (the in-memory page cursor persists between calls, so a capped drain resumes where it left off).
+- **`chunks` and `bytes` fields** in the `send` and `receive` JSON of the network functions, plus **`complete`** in `receive`. `chunks` is the number of payload chunks sent/applied; `bytes` is the serialized (uncompressed) payload bytes; `complete` is `false` when a chunked download stopped before the final chunk, signalling the caller to call again.
+
+### Deprecated
+
+- **`cloudsync_network_check_changes([max_chunks])`** — use `cloudsync_network_receive_changes()` instead. The old name remains a thin, fully-functional alias and will be removed in a future major version.
+
+### Changed
+
+- `cloudsync_network_receive_changes()` (and its `cloudsync_network_check_changes()` alias) now **drains all available chunks** in one call (it does not wait for not-yet-ready server preparation). Previously each call fetched a single page.
+- `cloudsync_network_sync(wait_ms, max_retries)` now drains an entire chunked download in a **single call**, fetching already-available chunks back-to-back with no delay. `wait_ms` / `max_retries` are now spent only while the server payload is not yet ready (HTTP 202), not while paging through chunks that are already available. Previously a multi-chunk stream required several `cloudsync_network_sync()` calls and wasted a `wait_ms` delay on each staged fragment.
+- `receive.rows` is now the **cumulative** number of rows applied across all chunks drained in the call (previously only the last page was reported); `receive.tables` likewise reports the union of tables touched across the whole drain.
+- `cloudsync_payload_apply()` now accepts legacy payloads, monolithic payloads, and v3 fragment payloads without enforcing the local `payload_max_chunk_size`, preserving compatibility between peers with different settings.
+- `cloudsync_network_send_changes()` now streams outgoing changes through `cloudsync_payload_chunks()` instead of first building one monolithic payload. This bounds transport payload size for the built-in network path and lets large rowsets or oversized BLOB/TEXT values flow through the same `/apply` endpoint as regular payloads.
+- The built-in `/check` receive path now advertises `X-CloudSync-Capabilities: check-status-response, check-chunks` and can apply cursor-mode pages returned inline as `data.payload` base64 bytes, in addition to larger pages returned as `data.url` download artifacts. Requests send `cursor`; responses provide `nextCursor` when another page is available.
+- The chunked-download receive path advances the local receive checkpoint (`check_dbversion` / `check_seq`) **only after a chunk stream has been fully applied**, jumping straight to the stream watermark — never into the middle of a source `db_version`. This mirrors the send path and ensures a stop between chunks cannot skip the un-applied rows of a `db_version` split across chunks on the next `/check` (the server resumes on `db_version > since`, with no intra-version cursor). `cloudsync_payload_apply()` no longer advances the receive checkpoint per applied chunk; the built-in network `/check` path drives it from the server's watermark and final-chunk signal, and falls back to the previous monolithic behavior when the server sends no watermark. Re-delivered rows remain idempotent.
+
+### Fixed
+
+- **PostgreSQL backend crash (segfault) on an error raised after `cloudsync_changes_select()`.** The set-returning function returned via `SRF_RETURN_NEXT` / `SRF_RETURN_DONE` from inside its `PG_TRY` block, which skips `PG_END_TRY()` and leaves `PG_exception_stack` pointing at the function's already-returned stack frame. A later `ereport(ERROR)` in the same query — such as the `cloudsync_payload_blob_checked()` size-limit check — then `siglongjmp()`d into freed stack and crashed the backend. The `SRF_RETURN_*` calls now run after `PG_END_TRY()` so the exception stack is always restored. This is a pre-existing bug, not specific to the chunked-payload work.
+
## [1.0.20] - 2026-05-26
### Changed
diff --git a/Makefile b/Makefile
index 189bf0ff..f9d1acf1 100644
--- a/Makefile
+++ b/Makefile
@@ -78,7 +78,9 @@ FI_SRC = $(FI_DIR)/fractional_indexing.c
# Combined for SQLite extension build
SRC_FILES = $(CORE_SRC) $(SQLITE_SRC) $(FI_SRC)
-TEST_SRC = $(wildcard $(TEST_DIR)/*.c)
+# network_unit.c is built separately with networking ENABLED (see network-unittest),
+# so keep it out of the default OMIT_NETWORK test build.
+TEST_SRC = $(filter-out $(TEST_DIR)/network_unit.c,$(wildcard $(TEST_DIR)/*.c))
TEST_FILES = $(SRC_FILES) $(TEST_SRC) $(wildcard $(SQLITE_DIR)/*.c)
RELEASE_OBJ = $(patsubst %.c, $(BUILD_RELEASE)/%.o, $(notdir $(SRC_FILES)))
TEST_OBJ = $(patsubst %.c, $(BUILD_TEST)/%.o, $(notdir $(TEST_FILES)))
@@ -86,6 +88,25 @@ COV_FILES = $(filter-out $(SRC_DIR)/lz4.c $(NETWORK_DIR)/network.c $(SQLITE_IMPL
CURL_LIB = $(CURL_DIR)/$(PLATFORM)/libcurl.a
TEST_TARGET = $(patsubst %.c,$(DIST_DIR)/%$(EXE), $(notdir $(TEST_SRC)))
+# Network-enabled unit tests: rebuild the codebase with networking ON (T_CFLAGS
+# minus OMIT_NETWORK) and link curl, so network.c's internal functions can be
+# tested directly on in-memory buffers. NT_LDFLAGS reuses the platform LDFLAGS
+# (which carries -lcurl) minus the shared-library-only flags (-shared on Linux,
+# -dynamiclib on macOS) so it links as an executable, plus the test link libs.
+# -undefined dynamic_lookup is kept: the test never opens a connection, so curl's
+# transport symbols are linked but never invoked.
+BUILD_NETTEST = build/nettest
+NT_CFLAGS = $(filter-out -DCLOUDSYNC_OMIT_NETWORK,$(T_CFLAGS))
+NT_LDFLAGS = $(filter-out -shared -dynamiclib -headerpad_max_install_names,$(LDFLAGS)) $(T_LDFLAGS)
+NT_SRC = $(SRC_FILES) $(SQLITE_DIR)/sqlite3.c $(TEST_DIR)/network_unit.c
+NT_OBJ = $(patsubst %.c,$(BUILD_NETTEST)/%.o,$(notdir $(NT_SRC)))
+
+# Build curl hermetically: neutralize the developer's ambient build env so
+# curl's ./configure compile tests aren't broken by overrides leaking in
+# (e.g. exported LDFLAGS/CPPFLAGS/LIBS pointing at Homebrew). Build flags for
+# curl are supplied explicitly via CURL_CONFIG.
+CURL_CONFIG_ENV = LDFLAGS= CPPFLAGS= LIBS= CFLAGS=
+
# Platform-specific settings
ifeq ($(PLATFORM),windows)
TARGET := $(DIST_DIR)/cloudsync.dll
@@ -185,7 +206,7 @@ endif
T_LDFLAGS += -fprofile-arcs -ftest-coverage
endif
-ifdef SYNC_BENCH_DEBUG
+ifdef NETWORK_TRACE
CFLAGS += -DCLOUDSYNC_NETWORK_TRACE
endif
@@ -255,8 +276,20 @@ $(BUILD_TEST)/sqlite3.o: $(SQLITE_DIR)/sqlite3.c
$(BUILD_TEST)/%.o: %.c
$(CC) $(T_CFLAGS) -c $< -o $@
+# Network-enabled object files (networking ON, for network-unittest)
+$(BUILD_NETTEST):
+ mkdir -p $(BUILD_NETTEST)
+$(BUILD_NETTEST)/sqlite3.o: $(SQLITE_DIR)/sqlite3.c | $(BUILD_NETTEST)
+ $(CC) $(CFLAGS) -DSQLITE_DQS=0 -DSQLITE_CORE -c $< -o $@
+$(BUILD_NETTEST)/%.o: %.c | $(BUILD_NETTEST)
+ $(CC) $(NT_CFLAGS) -c $< -o $@
+
# Run code coverage (--css-file $(CUSTOM_CSS))
-test: $(TARGET) $(TEST_TARGET) unittest e2e
+# dist/network_unit is listed before `unittest` so it is built during the file-build
+# phase: on Android the host `make test` aborts when `unittest` runs the cross-built
+# dist/unit, and network_unit (not in TEST_TARGET) would otherwise never be built
+# before `make test -n` captures the on-emulator command script.
+test: $(TARGET) $(TEST_TARGET) $(DIST_DIR)/network_unit$(EXE) unittest network-unittest e2e
set -e; $(SQLITE3) ":memory:" -cmd ".bail on" ".load ./$<" "SELECT cloudsync_version();"
ifneq ($(COVERAGE),false)
mkdir -p $(COV_DIR)
@@ -268,6 +301,17 @@ endif
unittest: $(TARGET) $(DIST_DIR)/unit$(EXE)
@./$(DIST_DIR)/unit$(EXE)
+# Network-enabled unit test binary. Link it via a file rule (like dist/unit), not in
+# the run recipe below: on Android `make test` runs binaries on the emulator from a
+# script generated by `make test -n`, so a link command inside the recipe would be
+# emitted and the cross-compiler invoked on-device (it isn't there → exit 127).
+$(DIST_DIR)/network_unit$(EXE): $(CURL_LIB) $(NT_OBJ)
+ $(CC) $(NT_OBJ) -o $@ $(NT_LDFLAGS)
+
+# Run the network-layer unit tests (networking compiled in, no server)
+network-unittest: $(DIST_DIR)/network_unit$(EXE)
+ @./$(DIST_DIR)/network_unit$(EXE)
+
# Run end-to-end integration tests
e2e: $(TARGET) $(DIST_DIR)/integration$(EXE)
@if [ -f .env ]; then \
@@ -292,7 +336,7 @@ sync-bench: $(TARGET) $(DIST_DIR)/sync_bench$(EXE)
./$(DIST_DIR)/sync_bench$(EXE)
sync-bench-debug:
- $(MAKE) SYNC_BENCH_DEBUG=1 sync-bench
+ $(MAKE) NETWORK_TRACE=1 sync-bench
OPENSSL_TARBALL = $(OPENSSL_DIR)/$(OPENSSL_VERSION).tar.gz
@@ -326,7 +370,7 @@ else
unzip $(CURL_DIR)/src/curl.zip -d $(CURL_DIR)/src/.
endif
- cd $(CURL_SRC) && ./configure \
+ cd $(CURL_SRC) && $(CURL_CONFIG_ENV) ./configure \
--without-libpsl \
--disable-alt-svc \
--disable-ares \
diff --git a/PERFORMANCE.md b/PERFORMANCE.md
index 236ab95a..c865d13b 100644
--- a/PERFORMANCE.md
+++ b/PERFORMANCE.md
@@ -41,7 +41,9 @@ SELECT ... FROM cloudsync_changes WHERE db_version >
Each metadata table has an **index on `db_version`**, so payload generation scales primarily with the number of new changes, plus a small per-synced-table overhead to construct the `cloudsync_changes` query. It does not diff the full dataset. In SQLite, each changed column also performs a primary-key lookup in the base table to retrieve the current value.
-The resulting payload is LZ4-compressed before transmission.
+The legacy `cloudsync_payload_encode()` API builds one monolithic LZ4-compressed payload before transmission. For large deltas, `cloudsync_payload_chunks()` can be used instead: it streams a sequence of payload chunks bounded by the `payload_max_chunk_size` setting (default 5 MB, minimum 256 KB, maximum 32 MB). If a single encoded BLOB/TEXT value is larger than the chunk budget, the value is split into transparent v3 fragments and reassembled by `cloudsync_payload_apply()` on the receiver.
+
+For legacy `/check` callers that still need one monolithic payload, `cloudsync_payload_blob_checked()` performs an internal size estimate before encoding. Successful calls scan the selected change window twice (estimate, then encode), so they trade extra I/O for avoiding unsafe monolithic payload allocation when the estimate exceeds the configured limit.
#### Pull: Payload Application
@@ -69,7 +71,7 @@ When the application runs sync off the main thread, perceived latency depends on
- **Sync interval**: How often the app triggers a push/pull cycle. More frequent syncs mean smaller deltas (smaller D) and faster individual sync operations, at the cost of more network round-trips.
- **Network latency**: The round-trip time to the sync server. LZ4 compression reduces payload size, but latency is dominated by the network hop itself for small deltas.
-- **Payload size**: Proportional to D x average column value size. Large BLOBs or TEXT values will increase transfer time linearly.
+- **Payload size**: Proportional to D x average column value size. Large BLOBs or TEXT values will increase transfer time linearly. Use `cloudsync_payload_chunks()` when transport payloads may be large; it limits each generated transport payload but does not change the size of the final database value.
The extension does not impose a sync schedule -- the application controls when and how often to sync. A typical pattern is to sync on a timer (e.g., every 5-30 seconds) or on specific events (app foreground, user action).
@@ -118,7 +120,11 @@ Normal application reads are not directly instrumented by the extension. No trig
When a new device syncs for the first time (`db_version = 0`), the push payload contains the **entire dataset**: every column of every row across all synced tables. The payload size is proportional to `N * C` (total rows times columns).
-The payload is built entirely in memory, starting with a 512 KB buffer (`CLOUDSYNC_PAYLOAD_MINBUF_SIZE` in `src/cloudsync.c`) and growing via `realloc` as needed. Peak memory usage is at least the full uncompressed payload size and can be higher during compression. For a database with 1 million rows and 10 columns of average 50 bytes each, the uncompressed payload could reach ~500 MB before LZ4 compression.
+With the legacy `cloudsync_payload_encode()` API, the payload is built entirely in memory, starting with a 512 KB buffer (`CLOUDSYNC_PAYLOAD_MINBUF_SIZE` in `src/cloudsync.c`) and growing via `realloc` as needed. Peak memory usage is at least the full uncompressed payload size and can be higher during compression. For a database with 1 million rows and 10 columns of average 50 bytes each, the uncompressed payload could reach ~500 MB before LZ4 compression.
+
+For large initial syncs, prefer `cloudsync_payload_chunks()`. It keeps each generated transport payload bounded by `payload_max_chunk_size` and can fragment a single oversized BLOB/TEXT column across multiple v3 fragment payloads. This prevents the transport payload itself from growing without bound and avoids constructing a monolithic v2 payload during v3 apply.
+
+Important limitation: chunking does **not** make a single database cell streamable all the way into the storage engine. When the last fragment of a very large BLOB/TEXT value arrives, the receiver must still materialize the completed value once in order to bind/store it in the destination database. Size `payload_max_chunk_size` for transport safety, but size application memory limits for the largest individual value you allow.
Subsequent syncs are incremental (proportional to D, changes since the last sync), so the first sync is the expensive one. Applications with large datasets should plan for this -- for example, by seeding new devices from a database snapshot rather than syncing from scratch.
@@ -185,6 +191,7 @@ CloudSync: sync_time ~ O(D) -- grows with changes since last sy
2. **`db_version` index**: Enables efficient range scans for delta extraction.
3. **Deferred batch merge**: Column changes for the same primary key are accumulated and flushed as a single SQL statement.
4. **Prepared statement caching**: Merge statements are compiled once and reused across rows.
-5. **LZ4 compression**: Reduces payload size for network transfer.
-6. **Per-column tracking**: Only changed columns are included in the sync payload, not entire rows.
-7. **Early exit on stale data**: The CLS algorithm skips rows where the incoming causal length is lower than the local one, avoiding unnecessary column-level comparisons.
+5. **Chunked payload generation**: `cloudsync_payload_chunks()` bounds transport payload size and handles oversized single values with transparent v3 fragments.
+6. **LZ4 compression**: Reduces payload size for network transfer.
+7. **Per-column tracking**: Only changed columns are included in the sync payload, not entire rows.
+8. **Early exit on stale data**: The CLS algorithm skips rows where the incoming causal length is lower than the local one, avoiding unnecessary column-level comparisons.
diff --git a/README.md b/README.md
index 6bdaa308..87ad4103 100644
--- a/README.md
+++ b/README.md
@@ -219,6 +219,7 @@ See the full guide: **[Row-Level Security Documentation](./docs/row-level-securi
## Documentation
- **[API Reference](./API.md)**: all functions, parameters, and examples
+- **[Performance & Overhead](./PERFORMANCE.md)**: sync cost model, payload chunking, and large-value memory notes
- **[Installation Guide](./docs/installation.md)**: platform-specific setup (Swift, Android, Expo, React Native, Flutter, WASM)
- **[Block-Level LWW Guide](./docs/block-lww.md)**: line-level text merge for markdown and documents
- **[Row-Level Security Guide](./docs/row-level-security.md)**: multi-tenant access control with server-enforced policies
diff --git a/docker/postgresql/Dockerfile b/docker/postgresql/Dockerfile
index b86e6dc0..e18b71b0 100644
--- a/docker/postgresql/Dockerfile
+++ b/docker/postgresql/Dockerfile
@@ -6,6 +6,7 @@ FROM postgres:${POSTGRES_TAG}
# and install the matching server-dev package
RUN apt-get update && apt-get install -y \
build-essential \
+ postgresql-contrib-${PG_MAJOR} \
postgresql-server-dev-${PG_MAJOR} \
git \
make \
diff --git a/docker/postgresql/Dockerfile.debug b/docker/postgresql/Dockerfile.debug
index 3f77c04a..c5549366 100644
--- a/docker/postgresql/Dockerfile.debug
+++ b/docker/postgresql/Dockerfile.debug
@@ -44,7 +44,9 @@ RUN set -eux; \
cd /usr/src/postgresql-17; \
./configure --enable-debug --enable-cassert --without-icu CFLAGS="-O0 -g3 -fno-omit-frame-pointer"; \
make -j"$(nproc)"; \
- make install
+ make install; \
+ make -C contrib/dblink -j"$(nproc)"; \
+ make -C contrib/dblink install
ENV PATH="/usr/local/pgsql/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/pgsql/lib:${LD_LIBRARY_PATH}"
diff --git a/docker/postgresql/Dockerfile.debug-no-optimization b/docker/postgresql/Dockerfile.debug-no-optimization
index 3f77c04a..c5549366 100644
--- a/docker/postgresql/Dockerfile.debug-no-optimization
+++ b/docker/postgresql/Dockerfile.debug-no-optimization
@@ -44,7 +44,9 @@ RUN set -eux; \
cd /usr/src/postgresql-17; \
./configure --enable-debug --enable-cassert --without-icu CFLAGS="-O0 -g3 -fno-omit-frame-pointer"; \
make -j"$(nproc)"; \
- make install
+ make install; \
+ make -C contrib/dblink -j"$(nproc)"; \
+ make -C contrib/dblink install
ENV PATH="/usr/local/pgsql/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/pgsql/lib:${LD_LIBRARY_PATH}"
diff --git a/docs/postgresql/reference/jwt-claims.md b/docs/postgresql/reference/jwt-claims.md
index 09635fd8..219a699d 100644
--- a/docs/postgresql/reference/jwt-claims.md
+++ b/docs/postgresql/reference/jwt-claims.md
@@ -133,6 +133,7 @@ GRANT SELECT, INSERT, UPDATE, DELETE ON
cloudsync_table_settings,
cloudsync_site_id,
cloudsync_schema_versions,
+ cloudsync_payload_fragments,
app_schema_version
TO rls_role;
diff --git a/docs/postgresql/reference/rls.md b/docs/postgresql/reference/rls.md
index 26e4bd5d..8b6586d2 100644
--- a/docs/postgresql/reference/rls.md
+++ b/docs/postgresql/reference/rls.md
@@ -182,7 +182,7 @@ When using Supabase:
**Symptom**: `cloudsync_payload_apply` returns a non-zero column-change count, but `SELECT` on the target table shows no new rows. No error is raised to the caller.
-**Cause**: The calling role is missing a grant on one of CloudSync's internal objects — the per-table shadow (`_cloudsync`), a metadata table (`cloudsync_settings`, `cloudsync_site_id`, `cloudsync_table_settings`, `cloudsync_schema_versions`, `app_schema_version`), the `cloudsync_changes` view, or the `cloudsync_site_id_id_seq` sequence. The per-PK savepoint rolls the write back, but `cloudsync_payload_apply` still returns the number of column changes it processed.
+**Cause**: The calling role is missing a grant on one of CloudSync's internal objects — the per-table shadow (`_cloudsync`), a metadata table (`cloudsync_settings`, `cloudsync_site_id`, `cloudsync_table_settings`, `cloudsync_schema_versions`, `cloudsync_payload_fragments`, `app_schema_version`), the `cloudsync_changes` view, or the `cloudsync_site_id_id_seq` sequence. The per-PK savepoint rolls the write back, but `cloudsync_payload_apply` still returns the number of column changes it processed.
**Solution**: Apply the full grant set from [JWT Claims → Required Grants](./jwt-claims.md#required-grants). To pinpoint which object is missing, re-run the apply as a superuser or raise log verbosity and inspect the server log for `permission denied` entries preceded by the `cloudsync_payload_apply` call.
diff --git a/examples/sport-tracker-app/src/SQLiteSync.ts b/examples/sport-tracker-app/src/SQLiteSync.ts
index 0b639d6d..85304c3a 100644
--- a/examples/sport-tracker-app/src/SQLiteSync.ts
+++ b/examples/sport-tracker-app/src/SQLiteSync.ts
@@ -51,9 +51,9 @@ export class SQLiteSync {
*
* Sync happens in these steps:
* 1. Send local changes to the server (`cloudsync_network_send_changes()`).
- * 2. Check for changes from the server (`cloudsync_network_check_changes()`).
+ * 2. Receive changes from the server (`cloudsync_network_receive_changes()`).
* 3. Waits a moment for the server to prepare changes if any.
- * 4. Check again for changes from the server and apply them to the local database (`cloudsync_network_check_changes()`).
+ * 4. Receive changes again from the server and apply them to the local database (`cloudsync_network_receive_changes()`).
*/
async sync() {
if (!this.db) {
diff --git a/examples/to-do-app/components/SyncContext.js b/examples/to-do-app/components/SyncContext.js
index 7b076efb..c381c02e 100644
--- a/examples/to-do-app/components/SyncContext.js
+++ b/examples/to-do-app/components/SyncContext.js
@@ -51,14 +51,14 @@ export const SyncProvider = ({ children }) => {
try {
// Use a timeout for the database query to prevent hanging
- const queryPromise = db.execute('SELECT cloudsync_network_check_changes();');
+ const queryPromise = db.execute('SELECT cloudsync_network_receive_changes();');
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Query timeout')), 5000)
);
const result = await Promise.race([queryPromise, timeoutPromise]);
- const raw = result.rows?.[0]?.['cloudsync_network_check_changes()'];
+ const raw = result.rows?.[0]?.['cloudsync_network_receive_changes()'];
if (raw) {
const { receive } = JSON.parse(raw);
if (receive.rows > 0) {
diff --git a/src/cloudsync.c b/src/cloudsync.c
index 908e9c17..6cc1f01e 100644
--- a/src/cloudsync.c
+++ b/src/cloudsync.c
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include "cloudsync.h"
#include "lz4.h"
@@ -55,8 +56,12 @@
#define CLOUDSYNC_PAYLOAD_VERSION_ORIGNAL 1
#define CLOUDSYNC_PAYLOAD_VERSION_1 CLOUDSYNC_PAYLOAD_VERSION_ORIGNAL
#define CLOUDSYNC_PAYLOAD_VERSION_2 2
+#define CLOUDSYNC_PAYLOAD_VERSION_3 3
#define CLOUDSYNC_PAYLOAD_VERSION_LATEST CLOUDSYNC_PAYLOAD_VERSION_2
#define CLOUDSYNC_PAYLOAD_MIN_VERSION_WITH_CHECKSUM CLOUDSYNC_PAYLOAD_VERSION_2
+#define CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX "__cloudsync_frag_v1__:"
+#define CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS (24*60*60)
+#define CLOUDSYNC_PAYLOAD_FRAGMENT_CLEANUP_MIN_INTERVAL (60)
#ifndef MAX
#define MAX(a, b) (((a)>(b))?(a):(b))
@@ -161,7 +166,11 @@ struct cloudsync_context {
int64_t pending_db_version;
// used to set an order inside each transaction
int seq;
-
+
+ // wall-clock (time()) of the last stale v3-fragment GC; throttles the GC so
+ // it does not run a full table scan on every applied fragment (0 = never run)
+ int64_t last_fragment_cleanup;
+
// optional schema_name to be set in the cloudsync_table_context
char *current_schema;
@@ -177,6 +186,12 @@ struct cloudsync_context {
// deferred column-batch merge (active during payload_apply)
merge_pending_batch *pending_batch;
+
+ // last (db_version, seq) successfully applied during the current
+ // cloudsync_payload_apply call; used to resolve the
+ // CLOUDSYNC_CHECKPOINT_LAST_APPLIED receive-checkpoint mode (-1 = none yet).
+ int64_t apply_last_db_version;
+ int64_t apply_last_seq;
};
struct cloudsync_table_context {
@@ -237,6 +252,7 @@ struct cloudsync_payload_context {
size_t bused;
uint64_t nrows;
uint16_t ncols;
+ uint8_t version;
};
#ifdef _MSC_VER
@@ -3029,7 +3045,21 @@ size_t cloudsync_payload_context_size (size_t *header_size) {
return sizeof(cloudsync_payload_context);
}
-void cloudsync_payload_header_init (cloudsync_payload_header *header, uint32_t expanded_size, uint16_t ncols, uint32_t nrows, uint64_t hash) {
+void cloudsync_payload_context_free (cloudsync_payload_context *payload) {
+ if (!payload) return;
+ if (payload->buffer) cloudsync_memory_free(payload->buffer);
+ cloudsync_memory_free(payload);
+}
+
+uint64_t cloudsync_payload_context_nrows (cloudsync_payload_context *payload) {
+ return payload ? payload->nrows : 0;
+}
+
+size_t cloudsync_payload_context_bused (cloudsync_payload_context *payload) {
+ return payload ? payload->bused : 0;
+}
+
+void cloudsync_payload_header_init (cloudsync_payload_header *header, uint8_t version, uint32_t expanded_size, uint16_t ncols, uint32_t nrows, uint64_t hash) {
memset(header, 0, sizeof(cloudsync_payload_header));
assert(sizeof(cloudsync_payload_header)==32);
@@ -3037,7 +3067,7 @@ void cloudsync_payload_header_init (cloudsync_payload_header *header, uint32_t e
sscanf(CLOUDSYNC_VERSION, "%d.%d.%d", &major, &minor, &patch);
header->signature = htonl(CLOUDSYNC_PAYLOAD_SIGNATURE);
- header->version = CLOUDSYNC_PAYLOAD_VERSION_2;
+ header->version = version;
header->libversion[0] = (uint8_t)major;
header->libversion[1] = (uint8_t)minor;
header->libversion[2] = (uint8_t)patch;
@@ -3073,6 +3103,320 @@ int cloudsync_payload_encode_step (cloudsync_payload_context *payload, cloudsync
return DBRES_OK;
}
+static bool cloudsync_payload_append_raw (cloudsync_payload_context *payload, cloudsync_context *data, const char **fields, const size_t *field_sizes, int nfields, uint8_t version) {
+ size_t needed = 0;
+ for (int i = 0; i < nfields; ++i) {
+ if (field_sizes[i] > SIZE_MAX - needed) {
+ cloudsync_set_error(data, CLOUDSYNC_ERRCODE_ROW_TOO_LARGE "cloudsync payload raw row too large", DBRES_NOMEM);
+ return false;
+ }
+ needed += field_sizes[i];
+ }
+ if (!cloudsync_payload_encode_check(payload, needed)) {
+ cloudsync_set_error(data, "Not enough memory to resize payload internal buffer", DBRES_NOMEM);
+ return false;
+ }
+ if (payload->nrows == 0) {
+ payload->ncols = (uint16_t)nfields;
+ payload->version = version;
+ }
+ char *dst = payload->buffer + payload->bused;
+ for (int i = 0; i < nfields; ++i) {
+ memcpy(dst, fields[i], field_sizes[i]);
+ dst += field_sizes[i];
+ }
+ payload->bused += needed;
+ ++payload->nrows;
+ return true;
+}
+
+int cloudsync_payload_max_chunk_size (cloudsync_context *data) {
+ int64_t value = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_PAYLOAD_MAX_CHUNK_SIZE);
+ if (value <= 0) value = CLOUDSYNC_PAYLOAD_CHUNK_DEFAULT_SIZE;
+ if (value < CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE) value = CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE;
+ if (value > CLOUDSYNC_PAYLOAD_CHUNK_MAX_SIZE) value = CLOUDSYNC_PAYLOAD_CHUNK_MAX_SIZE;
+ return (int)value;
+}
+
+int cloudsync_payload_fragment_target_size (cloudsync_context *data) {
+ int max_size = cloudsync_payload_max_chunk_size(data);
+ int target = max_size - (int)sizeof(cloudsync_payload_header) - CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN;
+ if (target < 1024) target = 1024;
+ return target;
+}
+
+static size_t cloudsync_payload_decimal_len_i64 (int64_t value) {
+ size_t len = value < 0 ? 1 : 0;
+ uint64_t v = (value < 0) ? (uint64_t)(-(value + 1)) + 1u : (uint64_t)value;
+ do {
+ len++;
+ v /= 10u;
+ } while (v != 0);
+ return len;
+}
+
+static bool cloudsync_payload_size_add (size_t *acc, size_t value) {
+ if (value > SIZE_MAX - *acc) return false;
+ *acc += value;
+ return true;
+}
+
+int cloudsync_payload_fragment_count (int64_t total_size, int target_size) {
+ if (total_size <= 0 || target_size <= 0) return 0;
+ uint64_t total = (uint64_t)total_size;
+ uint64_t target = (uint64_t)target_size;
+ uint64_t count = total / target + ((total % target) != 0);
+ if (count == 0 || count > INT_MAX) return 0;
+ return (int)count;
+}
+
+int cloudsync_payload_fragment_data_size (cloudsync_context *data,
+ const char *tbl, int tbl_len,
+ const void *pk, int pk_len,
+ const char *col_name, int col_name_len,
+ int64_t col_version, int64_t db_version,
+ const void *site_id, int site_id_len,
+ int64_t cl, int64_t seq,
+ int64_t total_size,
+ int part_index, int part_count) {
+ UNUSED_PARAMETER(pk);
+ UNUSED_PARAMETER(site_id);
+ if (tbl_len < 0 && tbl) tbl_len = (int)strlen(tbl);
+ if (col_name_len < 0 && col_name) col_name_len = (int)strlen(col_name);
+ if (tbl_len < 0 || pk_len < 0 || col_name_len < 0 || site_id_len < 0 || total_size < 0 || part_index < 0 || part_count <= 0) {
+ return 0;
+ }
+
+ size_t fixed = sizeof(cloudsync_payload_header);
+ size_t frag_col_len = strlen(CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX) + 32 + 1 + 16 + 1 +
+ cloudsync_payload_decimal_len_i64(part_index) + 1 +
+ cloudsync_payload_decimal_len_i64(part_count) + 1 +
+ cloudsync_payload_decimal_len_i64(total_size) + 1 +
+ (size_t)col_name_len;
+ size_t sizes[] = {
+ pk_encode_raw_size(DBTYPE_TEXT, tbl_len),
+ pk_encode_raw_size(DBTYPE_BLOB, pk_len),
+ pk_encode_raw_size(DBTYPE_TEXT, (int64_t)frag_col_len),
+ pk_encode_raw_size(DBTYPE_INTEGER, col_version),
+ pk_encode_raw_size(DBTYPE_INTEGER, db_version),
+ pk_encode_raw_size(DBTYPE_BLOB, site_id_len),
+ pk_encode_raw_size(DBTYPE_INTEGER, cl),
+ pk_encode_raw_size(DBTYPE_INTEGER, seq)
+ };
+ for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); ++i) {
+ if (sizes[i] == SIZE_MAX || !cloudsync_payload_size_add(&fixed, sizes[i])) return 0;
+ }
+
+ int max_size = cloudsync_payload_max_chunk_size(data);
+ if (fixed >= (size_t)max_size) return 0;
+
+ size_t candidate = (size_t)max_size - fixed;
+ if (candidate > INT_MAX) candidate = INT_MAX;
+ while (candidate > 0) {
+ size_t frag_size = pk_encode_raw_size(DBTYPE_BLOB, (int64_t)candidate);
+ if (frag_size == SIZE_MAX) return 0;
+ if (fixed <= (size_t)max_size && frag_size <= (size_t)max_size - fixed) return (int)candidate;
+ size_t total = fixed + frag_size;
+ size_t over = total > (size_t)max_size ? total - (size_t)max_size : 1;
+ if (candidate <= over) return 0;
+ candidate -= over;
+ }
+ return 0;
+}
+
+int cloudsync_payload_encoded_value_header (dbvalue_t *value, char *header, int header_cap, int64_t *payload_len) {
+ if (!value || !header || header_cap <= 0 || !payload_len) return -1;
+ int type = database_value_type(value);
+ *payload_len = 0;
+ if (type != DBTYPE_TEXT && type != DBTYPE_BLOB) return 0;
+ int64_t len = database_value_bytes(value);
+ if (len < 0) return -1;
+ *payload_len = len;
+ size_t total = pk_encode_raw_size(type, len);
+ if (total == SIZE_MAX || total < (size_t)len || total - (size_t)len > (size_t)header_cap) return -1;
+ if (type == DBTYPE_TEXT) {
+ size_t nbytes = pk_encode_raw_size(type, len) - (size_t)len - 1;
+ uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_TEXT);
+ header[0] = (char)type_byte;
+ for (size_t i = 0; i < nbytes; i++) header[1 + i] = (uint8_t)(((uint64_t)len >> (8 * (nbytes - 1 - i))) & 0xFFu);
+ return (int)(1 + nbytes);
+ } else {
+ size_t nbytes = pk_encode_raw_size(type, len) - (size_t)len - 1;
+ uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_BLOB);
+ header[0] = (char)type_byte;
+ for (size_t i = 0; i < nbytes; i++) header[1 + i] = (uint8_t)(((uint64_t)len >> (8 * (nbytes - 1 - i))) & 0xFFu);
+ return (int)(1 + nbytes);
+ }
+}
+
+uint64_t cloudsync_payload_encoded_value_checksum (dbvalue_t *value) {
+ if (!value) return 0;
+ int type = database_value_type(value);
+ if (type != DBTYPE_TEXT && type != DBTYPE_BLOB) {
+ size_t len = pk_encode_size(&value, 1, 0, -1);
+ char stack[32];
+ char *buf = stack;
+ if (len > sizeof(stack)) buf = cloudsync_memory_alloc((uint64_t)len);
+ if (!buf) return 0;
+ size_t bsize = len;
+ pk_encode(&value, 1, buf, false, &bsize, -1);
+ uint64_t h = pk_checksum(buf, bsize);
+ if (buf != stack) cloudsync_memory_free(buf);
+ return h;
+ }
+ char header[16];
+ int64_t payload_len = 0;
+ int header_len = cloudsync_payload_encoded_value_header(value, header, sizeof(header), &payload_len);
+ if (header_len <= 0) return 0;
+ uint64_t h = pk_checksum(header, (size_t)header_len);
+ const char *p = (const char *)database_value_blob(value);
+ if (p && payload_len > 0) {
+ const uint8_t *bytes = (const uint8_t *)p;
+ for (int64_t i = 0; i < payload_len; ++i) {
+ h ^= bytes[i];
+ h *= 1099511628211ULL;
+ }
+ }
+ return h;
+}
+
+static uint64_t cloudsync_checksum_update (uint64_t h, const void *data, size_t len) {
+ const uint8_t *p = (const uint8_t *)data;
+ for (size_t i = 0; i < len; ++i) {
+ h ^= p[i];
+ h *= 1099511628211ULL;
+ }
+ return h;
+}
+
+static uint64_t cloudsync_checksum_update_i64 (uint64_t h, int64_t value) {
+ uint64_t v = (uint64_t)value;
+ for (int i = 7; i >= 0; --i) {
+ uint8_t b = (uint8_t)((v >> (8 * i)) & 0xffu);
+ h = cloudsync_checksum_update(h, &b, 1);
+ }
+ return h;
+}
+
+static void cloudsync_payload_fragment_value_id (char out[33],
+ const char *tbl, int tbl_len,
+ const void *pk, int pk_len,
+ const char *col_name, int col_name_len,
+ int64_t col_version, int64_t db_version,
+ const void *site_id, int site_id_len,
+ int64_t cl, int64_t seq,
+ uint64_t value_checksum,
+ int64_t total_size) {
+ uint64_t h1 = 14695981039346656037ULL;
+ uint64_t h2 = 1099511628211ULL;
+ const char sep = '\x1f';
+
+ h1 = cloudsync_checksum_update(h1, tbl, (size_t)tbl_len);
+ h1 = cloudsync_checksum_update(h1, &sep, 1);
+ h1 = cloudsync_checksum_update(h1, pk, (size_t)pk_len);
+ h1 = cloudsync_checksum_update(h1, &sep, 1);
+ h1 = cloudsync_checksum_update(h1, col_name, (size_t)col_name_len);
+ h1 = cloudsync_checksum_update(h1, &sep, 1);
+ h1 = cloudsync_checksum_update(h1, site_id, (size_t)site_id_len);
+ h1 = cloudsync_checksum_update_i64(h1, col_version);
+ h1 = cloudsync_checksum_update_i64(h1, db_version);
+ h1 = cloudsync_checksum_update_i64(h1, cl);
+ h1 = cloudsync_checksum_update_i64(h1, seq);
+ h1 = cloudsync_checksum_update_i64(h1, (int64_t)value_checksum);
+ h1 = cloudsync_checksum_update_i64(h1, total_size);
+
+ h2 = cloudsync_checksum_update_i64(h2, total_size);
+ h2 = cloudsync_checksum_update_i64(h2, (int64_t)value_checksum);
+ h2 = cloudsync_checksum_update(h2, site_id, (size_t)site_id_len);
+ h2 = cloudsync_checksum_update(h2, col_name, (size_t)col_name_len);
+ h2 = cloudsync_checksum_update(h2, pk, (size_t)pk_len);
+ h2 = cloudsync_checksum_update(h2, tbl, (size_t)tbl_len);
+ h2 = cloudsync_checksum_update_i64(h2, seq);
+ h2 = cloudsync_checksum_update_i64(h2, cl);
+ h2 = cloudsync_checksum_update_i64(h2, db_version);
+ h2 = cloudsync_checksum_update_i64(h2, col_version);
+
+ snprintf(out, 33, "%016" PRIx64 "%016" PRIx64, h1, h2);
+}
+
+int cloudsync_payload_encode_fragment_step (cloudsync_payload_context *payload, cloudsync_context *data,
+ const char *tbl, int tbl_len,
+ const void *pk, int pk_len,
+ const char *col_name, int col_name_len,
+ const void *fragment, int fragment_len,
+ int64_t col_version, int64_t db_version,
+ const void *site_id, int site_id_len,
+ int64_t cl, int64_t seq,
+ uint64_t value_checksum,
+ int64_t total_size,
+ int part_index, int part_count) {
+ if (!payload || !data || !tbl || !pk || !col_name || !fragment || !site_id) return DBRES_MISUSE;
+ if (tbl_len < 0) tbl_len = (int)strlen(tbl);
+ if (col_name_len < 0) col_name_len = (int)strlen(col_name);
+ if (tbl_len < 0 || pk_len < 0 || col_name_len < 0 || fragment_len <= 0 || site_id_len < 0 ||
+ total_size <= 0 || part_index < 0 || part_count <= 0 || part_index >= part_count) {
+ return DBRES_MISUSE;
+ }
+
+ char value_id[33];
+ char checksum_hex[17];
+ cloudsync_payload_fragment_value_id(value_id, tbl, tbl_len, pk, pk_len, col_name, col_name_len,
+ col_version, db_version, site_id, site_id_len, cl, seq,
+ value_checksum, total_size);
+ snprintf(checksum_hex, sizeof(checksum_hex), "%016" PRIx64, value_checksum);
+
+ char *frag_col = cloudsync_memory_mprintf("%s%s:%s:%d:%d:%" PRId64 ":%.*s",
+ CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX,
+ value_id, checksum_hex, part_index, part_count, total_size,
+ col_name_len, col_name);
+ if (!frag_col) return DBRES_NOMEM;
+
+ size_t sizes[9] = {0};
+ sizes[0] = pk_encode_raw_size(DBTYPE_TEXT, tbl_len);
+ sizes[1] = pk_encode_raw_size(DBTYPE_BLOB, pk_len);
+ sizes[2] = pk_encode_raw_size(DBTYPE_TEXT, (int64_t)strlen(frag_col));
+ sizes[3] = pk_encode_raw_size(DBTYPE_BLOB, fragment_len);
+ sizes[4] = pk_encode_raw_size(DBTYPE_INTEGER, col_version);
+ sizes[5] = pk_encode_raw_size(DBTYPE_INTEGER, db_version);
+ sizes[6] = pk_encode_raw_size(DBTYPE_BLOB, site_id_len);
+ sizes[7] = pk_encode_raw_size(DBTYPE_INTEGER, cl);
+ sizes[8] = pk_encode_raw_size(DBTYPE_INTEGER, seq);
+ for (int i = 0; i < 9; ++i) {
+ if (sizes[i] == SIZE_MAX) { cloudsync_memory_free(frag_col); return DBRES_NOMEM; }
+ }
+
+ char stack[9][64];
+ char *fields[9] = {0};
+ for (int i = 0; i < 9; ++i) {
+ fields[i] = sizes[i] <= sizeof(stack[0]) ? stack[i] : cloudsync_memory_alloc((uint64_t)sizes[i]);
+ if (!fields[i]) {
+ for (int j = 0; j < i; ++j) if (fields[j] && (fields[j] < (char *)stack || fields[j] >= (char *)(stack + 9))) cloudsync_memory_free(fields[j]);
+ cloudsync_memory_free(frag_col);
+ return DBRES_NOMEM;
+ }
+ }
+
+ pk_encode_raw_text(fields[0], tbl, (size_t)tbl_len);
+ pk_encode_raw_blob(fields[1], pk, (size_t)pk_len);
+ pk_encode_raw_text(fields[2], frag_col, strlen(frag_col));
+ pk_encode_raw_blob(fields[3], fragment, (size_t)fragment_len);
+ pk_encode_raw_int(fields[4], col_version);
+ pk_encode_raw_int(fields[5], db_version);
+ pk_encode_raw_blob(fields[6], site_id, (size_t)site_id_len);
+ pk_encode_raw_int(fields[7], cl);
+ pk_encode_raw_int(fields[8], seq);
+
+ const char *cfields[9];
+ for (int i = 0; i < 9; ++i) cfields[i] = fields[i];
+ bool ok = cloudsync_payload_append_raw(payload, data, cfields, sizes, 9, CLOUDSYNC_PAYLOAD_VERSION_3);
+
+ for (int i = 0; i < 9; ++i) {
+ if (!(fields[i] >= (char *)stack && fields[i] < (char *)(stack + 9))) cloudsync_memory_free(fields[i]);
+ }
+ cloudsync_memory_free(frag_col);
+ return ok ? DBRES_OK : cloudsync_errcode(data);
+}
+
int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsync_context *data) {
DEBUG_FUNCTION("cloudsync_payload_encode_final");
@@ -3105,7 +3449,7 @@ int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsyn
if (payload->buffer) cloudsync_memory_free(payload->buffer);
payload->buffer = NULL;
payload->bsize = 0;
- cloudsync_set_error(data, "cloudsync_encode: payload too large to compress (INT_MAX limit)", DBRES_ERROR);
+ cloudsync_set_error(data, CLOUDSYNC_ERRCODE_PAYLOAD_TOO_LARGE "cloudsync_encode: payload too large to compress (INT_MAX limit)", DBRES_ERROR);
return DBRES_ERROR;
}
// try to allocate buffer used for compressed data
@@ -3122,7 +3466,8 @@ int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsyn
// setup payload header
cloudsync_payload_header header = {0};
uint32_t expanded_size = (use_uncompressed_buffer) ? 0 : real_buffer_size;
- cloudsync_payload_header_init(&header, expanded_size, payload->ncols, (uint32_t)payload->nrows, data->schema_hash);
+ uint8_t version = payload->version ? payload->version : CLOUDSYNC_PAYLOAD_VERSION_LATEST;
+ cloudsync_payload_header_init(&header, version, expanded_size, payload->ncols, (uint32_t)payload->nrows, data->schema_hash);
// if compression fails or if compressed size is bigger than original buffer, then use the uncompressed buffer
if (use_uncompressed_buffer) {
@@ -3208,9 +3553,487 @@ static int cloudsync_payload_decode_callback (void *xdata, int index, int type,
return rc;
}
+typedef struct {
+ const char *tbl;
+ int64_t tbl_len;
+ const void *pk;
+ int64_t pk_len;
+ const char *col_name;
+ int64_t col_name_len;
+ const void *col_value;
+ int64_t col_value_len;
+ int64_t col_version;
+ int64_t db_version;
+ const void *site_id;
+ int64_t site_id_len;
+ int64_t cl;
+ int64_t seq;
+} cloudsync_payload_fragment_row;
+
+static int cloudsync_payload_fragment_decode_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) {
+ UNUSED_PARAMETER(dval);
+ cloudsync_payload_fragment_row *row = (cloudsync_payload_fragment_row *)xdata;
+ switch (index) {
+ case CLOUDSYNC_PK_INDEX_TBL:
+ if (type != DBTYPE_TEXT) return DBRES_ERROR;
+ row->tbl = pval; row->tbl_len = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_PK:
+ if (type != DBTYPE_BLOB) return DBRES_ERROR;
+ row->pk = pval; row->pk_len = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_COLNAME:
+ if (type != DBTYPE_TEXT) return DBRES_ERROR;
+ row->col_name = pval; row->col_name_len = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_COLVALUE:
+ if (type != DBTYPE_BLOB) return DBRES_ERROR;
+ row->col_value = pval; row->col_value_len = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_COLVERSION:
+ if (type != DBTYPE_INTEGER) return DBRES_ERROR;
+ row->col_version = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_DBVERSION:
+ if (type != DBTYPE_INTEGER) return DBRES_ERROR;
+ row->db_version = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_SITEID:
+ if (type != DBTYPE_BLOB) return DBRES_ERROR;
+ row->site_id = pval; row->site_id_len = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_CL:
+ if (type != DBTYPE_INTEGER) return DBRES_ERROR;
+ row->cl = ival;
+ break;
+ case CLOUDSYNC_PK_INDEX_SEQ:
+ if (type != DBTYPE_INTEGER) return DBRES_ERROR;
+ row->seq = ival;
+ break;
+ }
+ return DBRES_OK;
+}
+
+static bool cloudsync_payload_is_hex (const char *value, size_t len) {
+ for (size_t i = 0; i < len; ++i) {
+ char c = value[i];
+ if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) return false;
+ }
+ return true;
+}
+
+static bool cloudsync_payload_parse_u64_segment (const char *start, const char *end, uint64_t max_value, uint64_t *out, const char **next) {
+ if (!start || !end || start >= end) return false;
+ uint64_t value = 0;
+ const char *p = start;
+ while (p < end && *p >= '0' && *p <= '9') {
+ uint64_t digit = (uint64_t)(*p - '0');
+ if (value > (max_value - digit) / 10u) return false;
+ value = value * 10u + digit;
+ p++;
+ }
+ if (p == start || p >= end || *p != ':') return false;
+ *out = value;
+ *next = p + 1;
+ return true;
+}
+
+static bool cloudsync_payload_fragment_parse_colname (const char *col_name, int64_t col_name_len,
+ char *value_id, size_t value_id_len,
+ char *checksum_hex, size_t checksum_hex_len,
+ int *part_index, int *part_count,
+ int64_t *total_size,
+ const char **base_col, int64_t *base_col_len) {
+ size_t prefix_len = strlen(CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX);
+ if (!col_name || col_name_len <= (int64_t)prefix_len) return false;
+ if (strncmp(col_name, CLOUDSYNC_PAYLOAD_FRAGMENT_PREFIX, prefix_len) != 0) return false;
+
+ const char *p = col_name + prefix_len;
+ const char *end = col_name + col_name_len;
+ const char *sep = memchr(p, ':', (size_t)(end - p));
+ if (!sep || (size_t)(sep - p) + 1 > value_id_len) return false;
+ if ((sep - p) != 32 || !cloudsync_payload_is_hex(p, (size_t)(sep - p))) return false;
+ memcpy(value_id, p, (size_t)(sep - p));
+ value_id[sep - p] = 0;
+
+ p = sep + 1;
+ sep = memchr(p, ':', (size_t)(end - p));
+ if (!sep || (size_t)(sep - p) + 1 > checksum_hex_len) return false;
+ if ((sep - p) != 16 || !cloudsync_payload_is_hex(p, (size_t)(sep - p))) return false;
+ memcpy(checksum_hex, p, (size_t)(sep - p));
+ checksum_hex[sep - p] = 0;
+
+ const char *next = NULL;
+ uint64_t parsed = 0;
+ if (!cloudsync_payload_parse_u64_segment(sep + 1, end, INT_MAX, &parsed, &next)) return false;
+ *part_index = (int)parsed;
+
+ if (!cloudsync_payload_parse_u64_segment(next, end, INT_MAX, &parsed, &next)) return false;
+ *part_count = (int)parsed;
+
+ if (!cloudsync_payload_parse_u64_segment(next, end, INT64_MAX, &parsed, &next)) return false;
+ *total_size = (int64_t)parsed;
+
+ *base_col = next;
+ *base_col_len = end - *base_col;
+ return (*part_count > 0 && *part_index < *part_count && *base_col_len > 0);
+}
+
+typedef struct {
+ dbvm_t *vm;
+ int param_index;
+} cloudsync_payload_bind_param_context;
+
+static int cloudsync_payload_bind_param_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval) {
+ UNUSED_PARAMETER(index);
+ cloudsync_payload_bind_param_context *ctx = (cloudsync_payload_bind_param_context *)xdata;
+ switch (type) {
+ case DBTYPE_INTEGER: return databasevm_bind_int(ctx->vm, ctx->param_index, ival);
+ case DBTYPE_FLOAT: return databasevm_bind_double(ctx->vm, ctx->param_index, dval);
+ case DBTYPE_NULL: return databasevm_bind_null(ctx->vm, ctx->param_index);
+ case DBTYPE_TEXT: return databasevm_bind_text(ctx->vm, ctx->param_index, pval, (int)ival);
+ case DBTYPE_BLOB: return databasevm_bind_blob(ctx->vm, ctx->param_index, pval, (uint64_t)ival);
+ }
+ return DBRES_MISUSE;
+}
+
+static int cloudsync_payload_fragments_cleanup_stale (cloudsync_context *data) {
+ // Stale-fragment GC is pure maintenance (it removes incomplete fragment groups
+ // older than CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS), so it has no correctness
+ // deadline. It runs a full GROUP BY scan of the fragments table; calling it on
+ // every applied fragment would be O(n^2) for a heavily-fragmented value, since
+ // each fragment arrives as its own apply call. Throttle it to at most once per
+ // CLOUDSYNC_PAYLOAD_FRAGMENT_CLEANUP_MIN_INTERVAL per connection.
+ int64_t now = (int64_t)time(NULL);
+ if (data->last_fragment_cleanup != 0 &&
+ now - data->last_fragment_cleanup < CLOUDSYNC_PAYLOAD_FRAGMENT_CLEANUP_MIN_INTERVAL) {
+ return DBRES_OK;
+ }
+ data->last_fragment_cleanup = now;
+
+ dbvm_t *vm = NULL;
+ int rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE, &vm, 0);
+ if (rc != DBRES_OK) return rc;
+ int64_t cutoff = now - CLOUDSYNC_PAYLOAD_FRAGMENT_STALE_SECONDS;
+ rc = databasevm_bind_int(vm, 1, cutoff);
+ if (rc == DBRES_OK) rc = databasevm_step(vm);
+ databasevm_finalize(vm);
+ return (rc == DBRES_DONE) ? DBRES_OK : rc;
+}
+
+static int cloudsync_payload_apply_single_decoded_row (cloudsync_context *data,
+ const char *tbl, size_t tbl_len,
+ const char *pk, size_t pk_len,
+ const char *col_name, size_t col_name_len,
+ const char *encoded_value, size_t encoded_value_len,
+ int64_t col_version, int64_t db_version,
+ const char *site_id, size_t site_id_len,
+ int64_t cl, int64_t seq,
+ int *pnrows) {
+ int rc = DBRES_OK;
+ dbvm_t *vm = NULL;
+ bool in_savepoint = false;
+ merge_pending_batch batch = {0};
+
+ rc = databasevm_prepare(data, SQL_CHANGES_INSERT_ROW, &vm, 0);
+ if (rc != DBRES_OK) return cloudsync_set_error(data, "Error on cloudsync_payload_apply: error while compiling SQL statement", rc);
+
+ rc = databasevm_bind_text(vm, 1, tbl, (int)tbl_len);
+ if (rc == DBRES_OK) rc = databasevm_bind_blob(vm, 2, pk, (uint64_t)pk_len);
+ if (rc == DBRES_OK) rc = databasevm_bind_text(vm, 3, col_name, (int)col_name_len);
+ if (rc == DBRES_OK) {
+ if (data->skip_decode_idx == CLOUDSYNC_PK_INDEX_COLVALUE) {
+ rc = databasevm_bind_blob(vm, 4, encoded_value, (uint64_t)encoded_value_len);
+ } else {
+ size_t seek = 0;
+ cloudsync_payload_bind_param_context bind_ctx = {.vm = vm, .param_index = 4};
+ int res = pk_decode((char *)encoded_value, encoded_value_len, 1, &seek, -1, cloudsync_payload_bind_param_callback, &bind_ctx);
+ if (res == -1 || seek != encoded_value_len) rc = cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 encoded value", DBRES_MISUSE);
+ }
+ }
+ if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 5, col_version);
+ if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 6, db_version);
+ if (rc == DBRES_OK) rc = databasevm_bind_blob(vm, 7, site_id, (uint64_t)site_id_len);
+ if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 8, cl);
+ if (rc == DBRES_OK) rc = databasevm_bind_int(vm, 9, seq);
+ if (rc != DBRES_OK) goto cleanup;
+
+ if (!database_in_transaction(data)) {
+ rc = database_begin_savepoint(data, "cloudsync_payload_apply");
+ if (rc != DBRES_OK) goto cleanup;
+ in_savepoint = true;
+ }
+
+ data->pending_batch = &batch;
+ rc = databasevm_step(vm);
+ if (rc == DBRES_DONE) rc = DBRES_OK;
+ if (rc != DBRES_OK) {
+ cloudsync_set_dberror(data);
+ goto cleanup;
+ }
+
+ rc = merge_flush_pending(data);
+ if (rc != DBRES_OK) goto cleanup;
+ data->pending_batch = NULL;
+
+ if (in_savepoint) {
+ rc = database_commit_savepoint(data, "cloudsync_payload_apply");
+ in_savepoint = false;
+ if (rc != DBRES_OK) goto cleanup;
+ }
+
+ // Do NOT advance the receive cursor here: a v3 value carries a single
+ // (db_version, seq) that can be in the middle of its source db_version, and a
+ // db_version's chunks can span multiple /check artifacts. Advancing per value
+ // would leave the cursor mid-db_version. The durable cursor is advanced once,
+ // after the whole payload/stream is applied, via cloudsync_payload_apply's
+ // checkpoint argument. Record the last applied position for the
+ // CLOUDSYNC_CHECKPOINT_LAST_APPLIED mode.
+ if (db_version > data->apply_last_db_version ||
+ (db_version == data->apply_last_db_version && seq > data->apply_last_seq)) {
+ data->apply_last_db_version = db_version;
+ data->apply_last_seq = seq;
+ }
+
+ if (pnrows) *pnrows += 1;
+
+cleanup:
+ if (rc != DBRES_OK && in_savepoint) database_rollback_savepoint(data, "cloudsync_payload_apply");
+ data->pending_batch = NULL;
+ merge_pending_free_entries(&batch);
+ if (batch.cached_vm) databasevm_finalize(batch.cached_vm);
+ if (batch.cached_col_names) cloudsync_memory_free(batch.cached_col_names);
+ if (batch.entries) cloudsync_memory_free(batch.entries);
+ if (vm) databasevm_finalize(vm);
+ return rc;
+}
+
+static int cloudsync_payload_apply_reassembled_fragment (cloudsync_context *data, const char *value_id, const char *expected_checksum_hex, int *pnrows) {
+ int rc = DBRES_OK;
+ dbvm_t *vm = NULL;
+ char *value = NULL;
+ char *tbl = NULL, *col_name = NULL;
+ char *pk = NULL, *site_id = NULL;
+ size_t tbl_len = 0, col_name_len = 0, pk_len = 0, site_id_len = 0;
+ int64_t col_version = 0, db_version = 0, cl = 0, seq = 0;
+ int64_t total_size = 0, copied = 0;
+
+ rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_COUNT, &vm, 0);
+ if (rc != DBRES_OK) return rc;
+ rc = databasevm_bind_text(vm, 1, value_id, -1);
+ if (rc != DBRES_OK) { databasevm_finalize(vm); return rc; }
+ rc = databasevm_step(vm);
+ if (rc != DBRES_ROW) { databasevm_finalize(vm); return DBRES_OK; }
+ int64_t have = database_column_int(vm, 0);
+ int64_t part_count_min = database_column_int(vm, 1);
+ int64_t part_count_max = database_column_int(vm, 2);
+ int64_t total_size_min = database_column_int(vm, 3);
+ int64_t total_size_max = database_column_int(vm, 4);
+ const char *checksum_min = database_column_text(vm, 5);
+ const char *checksum_max = database_column_text(vm, 6);
+ char checksum_min_copy[32] = {0};
+ char checksum_max_copy[32] = {0};
+ if (checksum_min) snprintf(checksum_min_copy, sizeof(checksum_min_copy), "%s", checksum_min);
+ if (checksum_max) snprintf(checksum_max_copy, sizeof(checksum_max_copy), "%s", checksum_max);
+ int64_t part_index_min = database_column_int(vm, 7);
+ int64_t part_index_max = database_column_int(vm, 8);
+ databasevm_finalize(vm);
+ vm = NULL;
+ if (have <= 0 || part_count_min <= 0 || have < part_count_max) return DBRES_OK;
+ if (part_count_min != part_count_max || total_size_min != total_size_max || !checksum_min_copy[0] || !checksum_max_copy[0] ||
+ strcmp(checksum_min_copy, checksum_max_copy) != 0 || strcmp(checksum_min_copy, expected_checksum_hex) != 0 ||
+ part_index_min != 0 || part_index_max != part_count_max - 1 || have != part_count_max) {
+ return cloudsync_set_error(data, "Error on cloudsync_payload_apply: inconsistent v3 fragments", DBRES_MISUSE);
+ }
+ total_size = total_size_max;
+
+ value = cloudsync_memory_alloc((uint64_t)total_size);
+ if (!value) return DBRES_NOMEM;
+
+ rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_SELECT, &vm, 0);
+ if (rc != DBRES_OK) goto cleanup;
+ rc = databasevm_bind_text(vm, 1, value_id, -1);
+ if (rc != DBRES_OK) goto cleanup;
+
+ uint64_t checksum = 14695981039346656037ULL;
+ while ((rc = databasevm_step(vm)) == DBRES_ROW) {
+ size_t frag_len = 0;
+ const char *frag = database_column_blob(vm, 0, &frag_len);
+ if (copied + (int64_t)frag_len > total_size) { rc = DBRES_MISUSE; goto cleanup; }
+ memcpy(value + copied, frag, frag_len);
+ checksum = cloudsync_checksum_update(checksum, frag, frag_len);
+ copied += (int64_t)frag_len;
+
+ if (!tbl) {
+ const char *t = database_column_text(vm, 1);
+ const char *c = database_column_text(vm, 3);
+ size_t pkl = 0, sidl = 0;
+ const char *p = database_column_blob(vm, 2, &pkl);
+ const char *sid = database_column_blob(vm, 6, &sidl);
+ tbl_len = (size_t)database_column_bytes(vm, 1);
+ col_name_len = (size_t)database_column_bytes(vm, 3);
+ pk_len = pkl;
+ site_id_len = sidl;
+ tbl = cloudsync_memory_alloc((uint64_t)tbl_len);
+ col_name = cloudsync_memory_alloc((uint64_t)col_name_len);
+ pk = cloudsync_memory_alloc((uint64_t)pk_len);
+ site_id = cloudsync_memory_alloc((uint64_t)site_id_len);
+ if (!tbl || !col_name || !pk || !site_id) { rc = DBRES_NOMEM; goto cleanup; }
+ memcpy(tbl, t, tbl_len);
+ memcpy(col_name, c, col_name_len);
+ memcpy(pk, p, pk_len);
+ memcpy(site_id, sid, site_id_len);
+ col_version = database_column_int(vm, 4);
+ db_version = database_column_int(vm, 5);
+ cl = database_column_int(vm, 7);
+ seq = database_column_int(vm, 8);
+ } else {
+ size_t pkl = 0, sidl = 0;
+ const char *t = database_column_text(vm, 1);
+ const char *c = database_column_text(vm, 3);
+ const char *p = database_column_blob(vm, 2, &pkl);
+ const char *sid = database_column_blob(vm, 6, &sidl);
+ if ((size_t)database_column_bytes(vm, 1) != tbl_len || memcmp(tbl, t, tbl_len) != 0 ||
+ pkl != pk_len || memcmp(pk, p, pk_len) != 0 ||
+ (size_t)database_column_bytes(vm, 3) != col_name_len || memcmp(col_name, c, col_name_len) != 0 ||
+ database_column_int(vm, 4) != col_version ||
+ database_column_int(vm, 5) != db_version ||
+ sidl != site_id_len || memcmp(site_id, sid, site_id_len) != 0 ||
+ database_column_int(vm, 7) != cl ||
+ database_column_int(vm, 8) != seq) {
+ rc = DBRES_MISUSE;
+ goto cleanup;
+ }
+ }
+ }
+ if (rc == DBRES_DONE) rc = DBRES_OK;
+ if (rc != DBRES_OK) goto cleanup;
+ if (copied != total_size) { rc = DBRES_MISUSE; goto cleanup; }
+ char checksum_hex[17];
+ snprintf(checksum_hex, sizeof(checksum_hex), "%016" PRIx64, checksum);
+ if (strcmp(checksum_hex, expected_checksum_hex) != 0) { rc = DBRES_MISUSE; goto cleanup; }
+ databasevm_finalize(vm);
+ vm = NULL;
+
+ rc = cloudsync_payload_apply_single_decoded_row(data, tbl, tbl_len, pk, pk_len, col_name, col_name_len,
+ value, (size_t)total_size, col_version, db_version,
+ site_id, site_id_len, cl, seq, pnrows);
+ if (rc != DBRES_OK) goto cleanup;
+
+ rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_DELETE, &vm, 0);
+ if (rc == DBRES_OK) {
+ databasevm_bind_text(vm, 1, value_id, -1);
+ int step_rc = databasevm_step(vm);
+ if (step_rc == DBRES_DONE) rc = DBRES_OK;
+ }
+
+cleanup:
+ if (vm) databasevm_finalize(vm);
+ if (value) cloudsync_memory_free(value);
+ if (tbl) cloudsync_memory_free(tbl);
+ if (col_name) cloudsync_memory_free(col_name);
+ if (pk) cloudsync_memory_free(pk);
+ if (site_id) cloudsync_memory_free(site_id);
+ return rc;
+}
+
+static int cloudsync_payload_apply_fragment_row (cloudsync_context *data, cloudsync_payload_fragment_row *row, int *pnrows) {
+ char value_id[64];
+ char checksum_hex[17];
+ int part_index = 0, part_count = 0;
+ int64_t total_size = 0;
+ const char *base_col = NULL;
+ int64_t base_col_len = 0;
+ if (!row || !row->tbl || row->tbl_len <= 0 || !row->pk || row->pk_len <= 0 ||
+ !row->col_name || row->col_name_len <= 0 || !row->col_value || row->col_value_len <= 0 ||
+ !row->site_id || row->site_id_len <= 0) {
+ return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 payload row", DBRES_MISUSE);
+ }
+ if (!cloudsync_payload_fragment_parse_colname(row->col_name, row->col_name_len, value_id, sizeof(value_id),
+ checksum_hex, sizeof(checksum_hex),
+ &part_index, &part_count, &total_size, &base_col, &base_col_len)) {
+ return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 fragment metadata", DBRES_MISUSE);
+ }
+
+ uint64_t value_checksum = strtoull(checksum_hex, NULL, 16);
+ char expected_value_id[33];
+ cloudsync_payload_fragment_value_id(expected_value_id, row->tbl, (int)row->tbl_len, row->pk, (int)row->pk_len,
+ base_col, (int)base_col_len, row->col_version, row->db_version,
+ row->site_id, (int)row->site_id_len, row->cl, row->seq,
+ value_checksum, total_size);
+ if (strcmp(value_id, expected_value_id) != 0) {
+ return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 fragment identity", DBRES_MISUSE);
+ }
+
+ // the fragments table is guaranteed by dbutils_settings_init; no DDL here
+ // because the apply path runs under sync-only credentials on server nodes
+ int rc = cloudsync_payload_fragments_cleanup_stale(data);
+ if (rc != DBRES_OK) return rc;
+
+ dbvm_t *vm = NULL;
+ rc = databasevm_prepare(data, SQL_PAYLOAD_FRAGMENTS_UPSERT, &vm, 0);
+ if (rc != DBRES_OK) return rc;
+ databasevm_bind_text(vm, 1, value_id, -1);
+ databasevm_bind_int(vm, 2, part_index);
+ databasevm_bind_int(vm, 3, part_count);
+ databasevm_bind_int(vm, 4, total_size);
+ databasevm_bind_text(vm, 5, checksum_hex, -1);
+ databasevm_bind_int(vm, 6, (int64_t)time(NULL));
+ databasevm_bind_text(vm, 7, row->tbl, (int)row->tbl_len);
+ databasevm_bind_blob(vm, 8, row->pk, (uint64_t)row->pk_len);
+ databasevm_bind_text(vm, 9, base_col, (int)base_col_len);
+ databasevm_bind_int(vm, 10, row->col_version);
+ databasevm_bind_int(vm, 11, row->db_version);
+ databasevm_bind_blob(vm, 12, row->site_id, (uint64_t)row->site_id_len);
+ databasevm_bind_int(vm, 13, row->cl);
+ databasevm_bind_int(vm, 14, row->seq);
+ databasevm_bind_blob(vm, 15, row->col_value, (uint64_t)row->col_value_len);
+ rc = databasevm_step(vm);
+ databasevm_finalize(vm);
+ if (rc == DBRES_DONE) rc = DBRES_OK;
+ if (rc != DBRES_OK) return rc;
+
+ return cloudsync_payload_apply_reassembled_fragment(data, value_id, checksum_hex, pnrows);
+}
+
// #ifndef CLOUDSYNC_OMIT_RLS_VALIDATION
-int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int blen, int *pnrows) {
+// Advance the durable receive cursor (check_dbversion/check_seq) after a payload
+// (or a fully-applied chunk stream) has been applied. See the checkpoint-mode
+// documentation on cloudsync_payload_apply in cloudsync.h. The advance is
+// strictly monotonic so re-delivered rows never regress the cursor.
+static void cloudsync_payload_apply_checkpoint (cloudsync_context *data, int64_t checkpoint_db_version, int64_t checkpoint_seq) {
+ int64_t target_db_version;
+ int64_t target_seq;
+
+ if (checkpoint_db_version == CLOUDSYNC_CHECKPOINT_NONE) return;
+ if (checkpoint_db_version == CLOUDSYNC_CHECKPOINT_LAST_APPLIED) {
+ // Nothing applied -> nothing to checkpoint.
+ if (data->apply_last_db_version < 0) return;
+ target_db_version = data->apply_last_db_version;
+ target_seq = data->apply_last_seq;
+ } else {
+ target_db_version = checkpoint_db_version;
+ target_seq = checkpoint_seq;
+ }
+
+ int64_t cur_db_version = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_CHECK_DBVERSION);
+ int64_t cur_seq = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_CHECK_SEQ);
+
+ // monotonic guard: never move the cursor backwards
+ if (target_db_version < cur_db_version) return;
+ if (target_db_version == cur_db_version && target_seq <= cur_seq) return;
+
+ char buf[256];
+ snprintf(buf, sizeof(buf), "%" PRId64, target_db_version);
+ dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_CHECK_DBVERSION, buf);
+ if (target_seq != cur_seq) {
+ snprintf(buf, sizeof(buf), "%" PRId64, target_seq);
+ dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_CHECK_SEQ, buf);
+ }
+}
+
+int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int blen, int *pnrows, int64_t checkpoint_db_version, int64_t checkpoint_seq) {
// Guard against calling payload_apply before cloudsync_init: without this,
// the settings lookups at the top of this function would each emit a
// "no such table: cloudsync_settings" debug line, control would fall
@@ -3226,7 +4049,12 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b
// sanity check
if (blen < (int)sizeof(cloudsync_payload_header)) return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid payload length", DBRES_MISUSE);
-
+
+ // track the last (db_version, seq) applied by this call so the receive
+ // checkpoint can be computed once, after the whole payload is applied
+ data->apply_last_db_version = -1;
+ data->apply_last_seq = -1;
+
// decode header
cloudsync_payload_header header;
memcpy(&header, payload, sizeof(cloudsync_payload_header));
@@ -3243,7 +4071,7 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b
if (header.schema_hash != data->schema_hash) {
if (!database_check_schema_hash(data, header.schema_hash)) {
char buffer[1024];
- snprintf(buffer, sizeof(buffer), "Cannot apply the received payload because the schema hash is unknown %llu.", header.schema_hash);
+ snprintf(buffer, sizeof(buffer), "Cannot apply the received payload because the schema hash is unknown %" PRIu64 ".", header.schema_hash);
return cloudsync_set_error(data, buffer, DBRES_MISUSE);
}
}
@@ -3253,6 +4081,9 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b
if ((header.signature != CLOUDSYNC_PAYLOAD_SIGNATURE) || (header.ncols == 0)) {
return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid signature or column size", DBRES_MISUSE);
}
+ if (header.version < CLOUDSYNC_PAYLOAD_VERSION_1 || header.version > CLOUDSYNC_PAYLOAD_VERSION_3) {
+ return cloudsync_set_error(data, "Error on cloudsync_payload_apply: unsupported payload version", DBRES_MISUSE);
+ }
const char *buffer = payload + sizeof(cloudsync_payload_header);
size_t buf_len = (size_t)blen - sizeof(cloudsync_payload_header);
@@ -3280,7 +4111,39 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b
buffer = (const char *)clone;
buf_len = (size_t)header.expanded_size;
}
-
+
+ if (header.version == CLOUDSYNC_PAYLOAD_VERSION_3) {
+ int rc = DBRES_OK;
+ int applied_rows = 0;
+ if (header.ncols != CLOUDSYNC_CHANGES_NCOLS) {
+ if (clone) cloudsync_memory_free(clone);
+ return cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 column count", DBRES_MISUSE);
+ }
+ for (uint32_t i = 0; i < header.nrows; ++i) {
+ size_t seek = 0;
+ cloudsync_payload_fragment_row row = {0};
+ int res = pk_decode((char *)buffer, buf_len, header.ncols, &seek, -1,
+ cloudsync_payload_fragment_decode_callback, &row);
+ if (res == -1 || seek == 0 || seek > buf_len) {
+ rc = cloudsync_set_error(data, "Error on cloudsync_payload_apply: invalid v3 payload row", DBRES_MISUSE);
+ break;
+ }
+ int n = 0;
+ rc = cloudsync_payload_apply_fragment_row(data, &row, &n);
+ if (rc != DBRES_OK) break;
+ applied_rows += n;
+ buffer += seek;
+ buf_len -= seek;
+ }
+ if (clone) cloudsync_memory_free(clone);
+ if (pnrows) *pnrows = applied_rows;
+ // Advance the receive cursor only after the whole payload is applied,
+ // gated on the caller-supplied checkpoint (a non-final chunk passes
+ // CLOUDSYNC_CHECKPOINT_NONE and leaves the cursor untouched).
+ if (rc == DBRES_OK) cloudsync_payload_apply_checkpoint(data, checkpoint_db_version, checkpoint_seq);
+ return rc;
+ }
+
// precompile the insert statement
dbvm_t *vm = NULL;
int rc = databasevm_prepare(data, SQL_CHANGES_INSERT_ROW, &vm, 0);
@@ -3293,8 +4156,6 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b
uint16_t ncols = header.ncols;
uint32_t nrows = header.nrows;
int64_t last_payload_db_version = -1;
- int dbversion = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_DBVERSION);
- int seq = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_SEQ);
cloudsync_pk_decode_bind_context decoded_context = {.vm = vm};
// Initialize deferred column-batch merge
@@ -3407,16 +4268,15 @@ int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int b
if (rc == DBRES_DONE) rc = DBRES_OK;
if (rc == DBRES_OK) {
- char buf[256];
- if (decoded_context.db_version >= dbversion) {
- snprintf(buf, sizeof(buf), "%" PRId64, decoded_context.db_version);
- dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_CHECK_DBVERSION, buf);
-
- if (decoded_context.seq != seq) {
- snprintf(buf, sizeof(buf), "%" PRId64, decoded_context.seq);
- dbutils_settings_set_key_value(data, CLOUDSYNC_KEY_CHECK_SEQ, buf);
- }
+ // Record the last applied (db_version, seq) and advance the receive cursor
+ // once, gated on the caller-supplied checkpoint. A non-final chunk passes
+ // CLOUDSYNC_CHECKPOINT_NONE so the cursor never lands mid-db_version.
+ if (decoded_context.db_version > data->apply_last_db_version ||
+ (decoded_context.db_version == data->apply_last_db_version && decoded_context.seq > data->apply_last_seq)) {
+ data->apply_last_db_version = decoded_context.db_version;
+ data->apply_last_seq = decoded_context.seq;
}
+ cloudsync_payload_apply_checkpoint(data, checkpoint_db_version, checkpoint_seq);
}
cleanup:
diff --git a/src/cloudsync.h b/src/cloudsync.h
index 56c4d2b8..2abc057f 100644
--- a/src/cloudsync.h
+++ b/src/cloudsync.h
@@ -18,7 +18,7 @@
extern "C" {
#endif
-#define CLOUDSYNC_VERSION "1.0.20"
+#define CLOUDSYNC_VERSION "1.1.0"
#define CLOUDSYNC_MAX_TABLENAME_LEN 512
#define CLOUDSYNC_VALUE_NOTSET -1
@@ -26,6 +26,33 @@ extern "C" {
#define CLOUDSYNC_RLS_RESTRICTED_VALUE "__[RLS]__"
#define CLOUDSYNC_DISABLE_ROWIDONLY_TABLES 1
#define CLOUDSYNC_DEFAULT_ALGO "cls"
+#define CLOUDSYNC_PAYLOAD_CHUNK_DEFAULT_SIZE (5 * 1024 * 1024)
+#define CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE (256 * 1024)
+// Hard ceiling on the effective chunk size, regardless of the per-database
+// payload_max_chunk_size setting. Protects the server (one chunk is built in
+// memory and stored as a single artifact) and the tenant from a misconfigured
+// value. Large TEXT/BLOB values still sync above this size: they are split
+// across chunks by the fragment path. Only a row whose non-fragmentable
+// scaffolding (primary key + column name + metadata, replicated into every
+// fragment) exceeds the chunk size hits row_too_large, which is practically
+// unreachable.
+#define CLOUDSYNC_PAYLOAD_CHUNK_MAX_SIZE (32 * 1024 * 1024)
+#define CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN (16 * 1024)
+// Fragment sizing is a small fixpoint: after the first target estimate, only
+// decimal metadata widths for part_index/part_count can change, so eight passes
+// is ample while still preventing an accidental unbounded planning loop.
+#define CLOUDSYNC_PAYLOAD_FRAGMENT_SIZE_FIXPOINT_ITERATIONS 8
+
+// Machine-parseable error-code tokens. These prefix the human-readable text of
+// permanent (non-retryable) failures so the CloudSync server can classify them
+// from the error message alone — the only signal common to both the Postgres
+// (pgconn.PgError.Message) and SQLite (result error text) backends. The server
+// parses the bracketed code with /cloudsync_error\[([a-z0-9_]+)\]/ and decides
+// retry policy; keep these strings stable and identical across backends. They
+// carry a trailing ": " so they concatenate directly onto a message literal.
+#define CLOUDSYNC_ERRCODE_PAYLOAD_TOO_LARGE "cloudsync_error[payload_too_large]: "
+#define CLOUDSYNC_ERRCODE_ROW_TOO_LARGE "cloudsync_error[row_too_large]: "
+#define CLOUDSYNC_ERRCODE_CHUNK_TOO_LARGE "cloudsync_error[chunk_too_large]: "
#define CLOUDSYNC_CHANGES_NCOLS 9
@@ -87,13 +114,57 @@ const char *cloudsync_schema (cloudsync_context *data);
const char *cloudsync_table_schema (cloudsync_context *data, const char *table_name);
// Payload
-int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int blen, int *nrows);
+// Receive-checkpoint modes for cloudsync_payload_apply's checkpoint_db_version
+// argument. The receive cursor (check_dbversion/check_seq) must only ever land
+// on a complete db_version boundary, otherwise a stop between chunks of a single
+// source db_version silently skips the unapplied rows on the next /check (the
+// server's cloudsync_payload_chunks uses db_version > since with no seq cursor).
+// >= 0 advance the cursor to exactly this
+// (watermark_db_version), with checkpoint_seq.
+// Used once a chunk stream is fully applied.
+// CLOUDSYNC_CHECKPOINT_NONE do not advance the cursor. Used for a
+// non-final chunk of a multi-chunk stream.
+// CLOUDSYNC_CHECKPOINT_LAST_APPLIED advance to this artifact's last applied
+// (db_version, seq). Legacy/monolithic
+// behavior: safe only for a complete payload
+// that ends on a db_version boundary.
+#define CLOUDSYNC_CHECKPOINT_NONE (-1)
+#define CLOUDSYNC_CHECKPOINT_LAST_APPLIED (-2)
+int cloudsync_payload_apply (cloudsync_context *data, const char *payload, int blen, int *nrows, int64_t checkpoint_db_version, int64_t checkpoint_seq);
int cloudsync_payload_encode_step (cloudsync_payload_context *payload, cloudsync_context *data, int argc, dbvalue_t **argv);
int cloudsync_payload_encode_final (cloudsync_payload_context *payload, cloudsync_context *data);
char *cloudsync_payload_blob (cloudsync_payload_context *payload, int64_t *blob_size, int64_t *nrows);
size_t cloudsync_payload_context_size (size_t *header_size);
+void cloudsync_payload_context_free (cloudsync_payload_context *payload);
+uint64_t cloudsync_payload_context_nrows (cloudsync_payload_context *payload);
+size_t cloudsync_payload_context_bused (cloudsync_payload_context *payload);
int cloudsync_payload_get (cloudsync_context *data, char **blob, int *blob_size, int *db_version, int64_t *new_db_version);
int cloudsync_payload_save (cloudsync_context *data, const char *payload_path, int *blob_size); // available only on Desktop OS (no WASM, no mobile)
+int cloudsync_payload_max_chunk_size (cloudsync_context *data);
+int cloudsync_payload_encode_fragment_step (cloudsync_payload_context *payload, cloudsync_context *data,
+ const char *tbl, int tbl_len,
+ const void *pk, int pk_len,
+ const char *col_name, int col_name_len,
+ const void *fragment, int fragment_len,
+ int64_t col_version, int64_t db_version,
+ const void *site_id, int site_id_len,
+ int64_t cl, int64_t seq,
+ uint64_t value_checksum,
+ int64_t total_size,
+ int part_index, int part_count);
+int cloudsync_payload_fragment_target_size (cloudsync_context *data);
+int cloudsync_payload_fragment_count (int64_t total_size, int target_size);
+int cloudsync_payload_fragment_data_size (cloudsync_context *data,
+ const char *tbl, int tbl_len,
+ const void *pk, int pk_len,
+ const char *col_name, int col_name_len,
+ int64_t col_version, int64_t db_version,
+ const void *site_id, int site_id_len,
+ int64_t cl, int64_t seq,
+ int64_t total_size,
+ int part_index, int part_count);
+uint64_t cloudsync_payload_encoded_value_checksum (dbvalue_t *value);
+int cloudsync_payload_encoded_value_header (dbvalue_t *value, char *header, int header_cap, int64_t *payload_len);
// CloudSync table context
int cloudsync_refill_metatable (cloudsync_context *data, const char *table_name);
diff --git a/src/dbutils.c b/src/dbutils.c
index 4e565fe3..0d923881 100644
--- a/src/dbutils.c
+++ b/src/dbutils.c
@@ -144,7 +144,13 @@ int dbutils_settings_get_value (cloudsync_context *data, const char *key, char *
// INT case
if (intvalue) {
- *intvalue = database_column_int(vm, 0);
+ int type = database_column_type(vm, 0);
+ if (type == DBTYPE_TEXT) {
+ const char *value = database_column_text(vm, 0);
+ *intvalue = value ? strtoll(value, NULL, 10) : 0;
+ } else {
+ *intvalue = database_column_int(vm, 0);
+ }
goto finalize_get_value;
}
@@ -476,12 +482,22 @@ int dbutils_settings_init (cloudsync_context *data) {
bool schema_versions_exists = database_internal_table_exists(data, CLOUDSYNC_SCHEMA_VERSIONS_NAME);
if (schema_versions_exists == false) {
DEBUG_SETTINGS("cloudsync_schema_versions does not exist (creating a new one)");
-
+
// create table
rc = database_exec(data, SQL_CREATE_SCHEMA_VERSIONS_TABLE);
if (rc != DBRES_OK) return rc;
}
-
+
+ // check if cloudsync_payload_fragments table exists
+ // created at init time because the apply path runs under sync-only
+ // credentials that lack DDL rights on server nodes
+ if (database_internal_table_exists(data, CLOUDSYNC_PAYLOAD_FRAGMENTS_NAME) == false) {
+ DEBUG_SETTINGS("cloudsync_payload_fragments does not exist (creating a new one)");
+
+ rc = database_exec(data, SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE);
+ if (rc != DBRES_OK) return rc;
+ }
+
// cloudsync_settings table exists so load it
dbutils_settings_load(data);
diff --git a/src/dbutils.h b/src/dbutils.h
index 472469a2..b9394fea 100644
--- a/src/dbutils.h
+++ b/src/dbutils.h
@@ -15,6 +15,7 @@
#define CLOUDSYNC_SITEID_NAME "cloudsync_site_id"
#define CLOUDSYNC_TABLE_SETTINGS_NAME "cloudsync_table_settings"
#define CLOUDSYNC_SCHEMA_VERSIONS_NAME "cloudsync_schema_versions"
+#define CLOUDSYNC_PAYLOAD_FRAGMENTS_NAME "cloudsync_payload_fragments"
#define CLOUDSYNC_KEY_LIBVERSION "version"
#define CLOUDSYNC_KEY_SCHEMAVERSION "schemaversion"
@@ -26,6 +27,7 @@
#define CLOUDSYNC_KEY_DEBUG "debug"
#define CLOUDSYNC_KEY_ALGO "algo"
#define CLOUDSYNC_KEY_SKIP_SCHEMA_HASH_CHECK "skip_schema_hash_check"
+#define CLOUDSYNC_KEY_PAYLOAD_MAX_CHUNK_SIZE "payload_max_chunk_size"
// settings
int dbutils_settings_init (cloudsync_context *data);
diff --git a/src/network/network.c b/src/network/network.c
index 652f96c5..038adffd 100644
--- a/src/network/network.c
+++ b/src/network/network.c
@@ -9,6 +9,7 @@
#include
#include
+#include
#include
#include
#include
@@ -62,6 +63,13 @@ static size_t cacert_len = sizeof(cacert_pem) - 1;
#define DEFAULT_SYNC_WAIT_MS 100
#define DEFAULT_SYNC_MAX_RETRIES 1
+
+// Safety bound on how many chunks a single cloudsync_network_sync call will drain
+// back-to-back. The drain is normally terminated by the server's "final" flag; this
+// only guards against a buggy/never-final server. On hit, the drain stops with
+// complete=false and the durable receive cursor is left unadvanced past un-applied
+// rows, so the next sync resumes safely (apply is idempotent).
+#define CLOUDSYNC_CHECK_MAX_CHUNKS_PER_DRAIN 10000
#define MAX_QUERY_VALUE_LEN 256
@@ -82,6 +90,13 @@ struct network_data {
char *apply_endpoint;
char *status_endpoint;
int ticket_enabled;
+ // Best-effort spool cursor for the chunked /check download drain. The durable
+ // receive cursor (check_dbversion/check_seq) is frozen at "since" for the whole
+ // drain, so the server (which is stateless across /check calls) needs the client
+ // to echo which spool page to serve next. In-memory only: losing it just
+ // restarts the drain from page 0, which is safe because apply is idempotent.
+ int64_t check_cursor; // next page index to request (0 = fresh drain)
+ int64_t check_cursor_since; // the check_dbversion check_cursor belongs to
#ifndef CLOUDSYNC_OMIT_CURL
CURL *api_curl;
CURL *artifact_curl;
@@ -479,7 +494,7 @@ NETWORK_RESULT network_receive_buffer (network_data *data, const char *endpoint,
CURLcode rc = curl_easy_setopt(curl, CURLOPT_URL, endpoint);
if (rc != CURLE_OK) goto cleanup;
-
+
// set PEM
#ifdef __ANDROID__
struct curl_blob pem_blob = {
@@ -725,9 +740,11 @@ int network_set_sqlite_result (sqlite3_context *context, NETWORK_RESULT *result)
// on the sqlite3_context. This lets composite callers (cloudsync_network_sync)
// surface apply errors as structured JSON. Endpoint/network errors always raise
// a SQL error regardless of err_out.
-int network_download_changes (sqlite3_context *context, const char *download_url, int *pnrows, char **err_out) {
+int network_download_changes (sqlite3_context *context, const char *download_url, int *pnrows, char **err_out, int64_t checkpoint_db_version, int64_t checkpoint_seq, int64_t *pbytes) {
DEBUG_FUNCTION("network_download_changes");
+ if (pbytes) *pbytes = 0;
+
cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
network_data *netdata = (network_data *)cloudsync_auxdata(data);
if (!netdata) {
@@ -739,7 +756,8 @@ int network_download_changes (sqlite3_context *context, const char *download_url
int rc = SQLITE_OK;
if (result.code == CLOUDSYNC_NETWORK_BUFFER) {
- rc = cloudsync_payload_apply(data, result.buffer, (int)result.blen, pnrows);
+ if (pbytes) *pbytes = (int64_t)result.blen;
+ rc = cloudsync_payload_apply(data, result.buffer, (int)result.blen, pnrows, checkpoint_db_version, checkpoint_seq);
if (rc != DBRES_OK) {
const char *msg = cloudsync_errmsg(data);
if (!msg || !msg[0]) msg = "cloudsync_payload_apply failed";
@@ -760,6 +778,22 @@ int network_download_changes (sqlite3_context *context, const char *download_url
return rc;
}
+static int network_apply_payload_buffer(sqlite3_context *context, const char *payload, int payload_size,
+ int *pnrows, char **err_out,
+ int64_t checkpoint_db_version, int64_t checkpoint_seq,
+ const char *error_prefix) {
+ cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
+ int rc = cloudsync_payload_apply(data, payload, payload_size, pnrows, checkpoint_db_version, checkpoint_seq);
+ if (rc != DBRES_OK) {
+ const char *msg = cloudsync_errmsg(data);
+ if (!msg || !msg[0]) msg = error_prefix ? error_prefix : "cloudsync_payload_apply failed";
+ if (err_out) *err_out = cloudsync_string_dup(msg);
+ else sqlite3_result_error(context, msg, -1);
+ if (pnrows) *pnrows = 0;
+ }
+ return rc;
+}
+
char *network_authentication_token (const char *key, const char *value) {
size_t len = strlen(key) + strlen(value) + 64;
char *buffer = cloudsync_memory_zeroalloc(len);
@@ -773,8 +807,6 @@ char *network_authentication_token (const char *key, const char *value) {
// MARK: - JSON helpers (jsmn) -
-#define JSMN_MAX_TOKENS 64
-
static bool jsmn_token_eq(const char *json, const jsmntok_t *tok, const char *s) {
return (tok->type == JSMN_STRING &&
(int)strlen(s) == tok->end - tok->start &&
@@ -788,6 +820,57 @@ static int jsmn_find_key(const char *json, const jsmntok_t *tokens, int ntokens,
return -1;
}
+static int jsmn_token_span(const jsmntok_t *tokens, int ntokens, int index) {
+ if (!tokens || index < 0 || index >= ntokens) return 0;
+ int start = tokens[index].start;
+ int end = tokens[index].end;
+ int i = index + 1;
+ while (i < ntokens && tokens[i].start >= start && tokens[i].end <= end) i++;
+ return i - index;
+}
+
+static bool jsmn_find_object_value(const char *json, const jsmntok_t *tokens, int ntokens,
+ int object_index, const char *key, int *value_index) {
+ if (!json || !tokens || !key || object_index < 0 || object_index >= ntokens ||
+ tokens[object_index].type != JSMN_OBJECT) {
+ return false;
+ }
+
+ int i = object_index + 1;
+ for (int pair = 0; pair < tokens[object_index].size && i + 1 < ntokens; pair++) {
+ if (jsmn_token_eq(json, &tokens[i], key)) {
+ if (value_index) *value_index = i + 1;
+ return true;
+ }
+ i++;
+ i += jsmn_token_span(tokens, ntokens, i);
+ }
+ return false;
+}
+
+static jsmntok_t *json_parse_tokens_alloc(const char *json, size_t json_len, int *ntokens_out) {
+ if (ntokens_out) *ntokens_out = 0;
+ if (!json || json_len == 0 || !ntokens_out) return NULL;
+
+ jsmn_parser parser;
+ jsmn_init(&parser);
+ int ntokens = jsmn_parse(&parser, json, json_len, NULL, 0);
+ if (ntokens < 1) return NULL;
+
+ jsmntok_t *tokens = cloudsync_memory_alloc((uint64_t)ntokens * sizeof(jsmntok_t));
+ if (!tokens) return NULL;
+
+ jsmn_init(&parser);
+ int parsed = jsmn_parse(&parser, json, json_len, tokens, (unsigned int)ntokens);
+ if (parsed < 1) {
+ cloudsync_memory_free(tokens);
+ return NULL;
+ }
+
+ *ntokens_out = parsed;
+ return tokens;
+}
+
static char *json_unescape_string(const char *src, int len) {
char *out = cloudsync_memory_zeroalloc(len + 1);
if (!out) return NULL;
@@ -827,55 +910,75 @@ static char *json_unescape_string(const char *src, int len) {
static char *json_extract_string(const char *json, size_t json_len, const char *key) {
if (!json || json_len == 0 || !key) return NULL;
- jsmn_parser parser;
- jsmntok_t tokens[JSMN_MAX_TOKENS];
- jsmn_init(&parser);
- int ntokens = jsmn_parse(&parser, json, json_len, tokens, JSMN_MAX_TOKENS);
- if (ntokens < 1) return NULL;
+ int ntokens = 0;
+ jsmntok_t *tokens = json_parse_tokens_alloc(json, json_len, &ntokens);
+ if (!tokens) return NULL;
+ char *result = NULL;
int i = jsmn_find_key(json, tokens, ntokens, key);
- if (i < 0 || i + 1 >= ntokens) return NULL;
-
- jsmntok_t *val = &tokens[i + 1];
- if (val->type != JSMN_STRING) return NULL;
-
- return json_unescape_string(json + val->start, val->end - val->start);
+ if (i >= 0 && i + 1 < ntokens) {
+ jsmntok_t *val = &tokens[i + 1];
+ if (val->type == JSMN_STRING)
+ result = json_unescape_string(json + val->start, val->end - val->start);
+ }
+ cloudsync_memory_free(tokens);
+ return result;
}
static int64_t json_extract_int(const char *json, size_t json_len, const char *key, int64_t default_value) {
if (!json || json_len == 0 || !key) return default_value;
- jsmn_parser parser;
- jsmntok_t tokens[JSMN_MAX_TOKENS];
- jsmn_init(&parser);
- int ntokens = jsmn_parse(&parser, json, json_len, tokens, JSMN_MAX_TOKENS);
- if (ntokens < 1 || tokens[0].type != JSMN_OBJECT) return default_value;
+ int ntokens = 0;
+ jsmntok_t *tokens = json_parse_tokens_alloc(json, json_len, &ntokens);
+ if (!tokens) return default_value;
- int i = jsmn_find_key(json, tokens, ntokens, key);
- if (i < 0 || i + 1 >= ntokens) return default_value;
+ int64_t result = default_value;
+ if (tokens[0].type == JSMN_OBJECT) {
+ int i = jsmn_find_key(json, tokens, ntokens, key);
+ if (i >= 0 && i + 1 < ntokens && tokens[i + 1].type == JSMN_PRIMITIVE)
+ result = strtoll(json + tokens[i + 1].start, NULL, 10);
+ }
+ cloudsync_memory_free(tokens);
+ return result;
+}
- jsmntok_t *val = &tokens[i + 1];
- if (val->type != JSMN_PRIMITIVE) return default_value;
+static bool json_extract_bool(const char *json, size_t json_len, const char *key, bool default_value) {
+ if (!json || json_len == 0 || !key) return default_value;
- return strtoll(json + val->start, NULL, 10);
+ int ntokens = 0;
+ jsmntok_t *tokens = json_parse_tokens_alloc(json, json_len, &ntokens);
+ if (!tokens) return default_value;
+
+ bool result = default_value;
+ if (tokens[0].type == JSMN_OBJECT) {
+ int i = jsmn_find_key(json, tokens, ntokens, key);
+ if (i >= 0 && i + 1 < ntokens && tokens[i + 1].type == JSMN_PRIMITIVE) {
+ // JSON booleans (true/false) and numeric flags (1/0) are both accepted.
+ char c = json[tokens[i + 1].start];
+ if (c == 't' || c == 'T') result = true;
+ else if (c == 'f' || c == 'F' || c == 'n' || c == 'N') result = false;
+ else result = strtoll(json + tokens[i + 1].start, NULL, 10) != 0;
+ }
+ }
+ cloudsync_memory_free(tokens);
+ return result;
}
static int json_extract_array_size(const char *json, size_t json_len, const char *key) {
if (!json || json_len == 0 || !key) return -1;
- jsmn_parser parser;
- jsmntok_t tokens[JSMN_MAX_TOKENS];
- jsmn_init(&parser);
- int ntokens = jsmn_parse(&parser, json, json_len, tokens, JSMN_MAX_TOKENS);
- if (ntokens < 1 || tokens[0].type != JSMN_OBJECT) return -1;
-
- int i = jsmn_find_key(json, tokens, ntokens, key);
- if (i < 0 || i + 1 >= ntokens) return -1;
-
- jsmntok_t *val = &tokens[i + 1];
- if (val->type != JSMN_ARRAY) return -1;
+ int ntokens = 0;
+ jsmntok_t *tokens = json_parse_tokens_alloc(json, json_len, &ntokens);
+ if (!tokens) return -1;
- return val->size;
+ int result = -1;
+ if (tokens[0].type == JSMN_OBJECT) {
+ int i = jsmn_find_key(json, tokens, ntokens, key);
+ if (i >= 0 && i + 1 < ntokens && tokens[i + 1].type == JSMN_ARRAY)
+ result = tokens[i + 1].size;
+ }
+ cloudsync_memory_free(tokens);
+ return result;
}
// Escape a string for safe embedding as a JSON string value (without surrounding quotes).
@@ -918,25 +1021,24 @@ static char *json_escape_string(const char *src) {
static char *json_extract_object_raw(const char *json, size_t json_len, const char *key) {
if (!json || json_len == 0 || !key) return NULL;
- jsmn_parser parser;
- jsmntok_t tokens[JSMN_MAX_TOKENS];
- jsmn_init(&parser);
- int ntokens = jsmn_parse(&parser, json, json_len, tokens, JSMN_MAX_TOKENS);
- if (ntokens < 1) return NULL;
+ int ntokens = 0;
+ jsmntok_t *tokens = json_parse_tokens_alloc(json, json_len, &ntokens);
+ if (!tokens) return NULL;
+ char *out = NULL;
int i = jsmn_find_key(json, tokens, ntokens, key);
- if (i < 0 || i + 1 >= ntokens) return NULL;
-
- jsmntok_t *val = &tokens[i + 1];
- if (val->type != JSMN_OBJECT) return NULL;
-
- int len = val->end - val->start;
- if (len <= 0) return NULL;
-
- char *out = cloudsync_memory_zeroalloc(len + 1);
- if (!out) return NULL;
- memcpy(out, json + val->start, len);
- out[len] = '\0';
+ if (i >= 0 && i + 1 < ntokens) {
+ jsmntok_t *val = &tokens[i + 1];
+ int len = val->end - val->start;
+ if (val->type == JSMN_OBJECT && len > 0) {
+ out = cloudsync_memory_zeroalloc(len + 1);
+ if (out) {
+ memcpy(out, json + val->start, len);
+ out[len] = '\0';
+ }
+ }
+ }
+ cloudsync_memory_free(tokens);
return out;
}
@@ -1192,7 +1294,12 @@ static char *network_get_affected_tables(sqlite3 *db, int64_t since_db_version)
// - Server-reported failures from the SyncStatusResponse failures object are
// forwarded as send.lastFailure (failures.apply) and receive.lastFailure
// (failures.check). Per-function scoping: send_changes emits send.lastFailure
-// only; check_changes emits receive.lastFailure only; sync emits both.
+// only; receive_changes emits receive.lastFailure only; sync emits both.
+// - A non-retryable failures.check (retryable:false) is a permanent
+// configuration/authorization problem: the receive drain stops polling
+// immediately rather than waiting it out. receive_changes raises it as a SQL
+// error (fail fast — no send block to preserve); sync still emits structured
+// JSON with receive.lastFailure so its send block survives.
//
// Callers that receive JSON can trust that the server was reachable.
// A SQL error means connectivity or configuration is broken.
@@ -1205,6 +1312,16 @@ typedef struct {
char *tables_json; // JSON array of affected table names, caller must cloudsync_memory_free
char *apply_failure_json; // raw JSON object for server-reported failures.apply, caller must cloudsync_memory_free
char *check_failure_json; // raw JSON object for server-reported failures.check, caller must cloudsync_memory_free
+ // Per-call chunked-download signals written by cloudsync_network_check_internal.
+ bool page_delivered; // one or more url/inline chunks were fetched + applied this call (rc==OK)
+ bool more_pending; // this was a non-final chunked batch with a next cursor (drain should continue)
+ bool defer_tables; // input: when true, check_internal skips its per-call tables query
+ // so a multi-chunk drain can compute the union once at the end
+ int64_t bytes_received; // serialized payload bytes received this call (0 on 202/no page)
+ int chunks_received; // payload chunks received this call (0 on 202/no page)
+ // Send-path transport volume, written by cloudsync_network_send_changes_internal.
+ int send_chunks; // number of payload chunks sent this call
+ int64_t send_bytes; // serialized payload bytes sent this call
} sync_result;
// Returns a malloc'd raw JSON copy of failures. ("apply" or "check"),
@@ -1255,14 +1372,142 @@ static char *network_base64_encode(const unsigned char *src, size_t len) {
return out;
}
-static char *network_apply_json_payload(const char *transport_key, const char *transport_value,
- int db_version_min, int db_version_max) {
- if (!transport_key || !transport_value) return NULL;
+static int network_base64_value(char c) {
+ if (c >= 'A' && c <= 'Z') return c - 'A';
+ if (c >= 'a' && c <= 'z') return c - 'a' + 26;
+ if (c >= '0' && c <= '9') return c - '0' + 52;
+ if (c == '+') return 62;
+ if (c == '/') return 63;
+ return -1;
+}
+
+static unsigned char *network_base64_decode(const char *src, size_t len, size_t *out_len) {
+ if (!src || !out_len) return NULL;
+ *out_len = 0;
+
+ size_t effective_len = 0;
+ for (size_t i = 0; i < len; ++i) {
+ if (!isspace((unsigned char)src[i])) ++effective_len;
+ }
+ if (effective_len == 0 || effective_len % 4 != 0) return NULL;
+
+ size_t max_out_len = (effective_len / 4) * 3;
+ unsigned char *out = cloudsync_memory_alloc((uint64_t)max_out_len);
+ if (!out) return NULL;
+
+ int quartet[4];
+ int q = 0;
+ size_t j = 0;
+ bool seen_padding = false;
+
+ for (size_t i = 0; i < len; ++i) {
+ unsigned char c = (unsigned char)src[i];
+ if (isspace(c)) continue;
+
+ if (c == '=') {
+ quartet[q++] = -2;
+ seen_padding = true;
+ } else {
+ int v = network_base64_value((char)c);
+ if (v < 0 || seen_padding) goto invalid;
+ quartet[q++] = v;
+ }
+
+ if (q == 4) {
+ if (quartet[0] < 0 || quartet[1] < 0) goto invalid;
+ if (quartet[2] == -2 && quartet[3] != -2) goto invalid;
+
+ uint32_t triple = ((uint32_t)quartet[0] << 18) | ((uint32_t)quartet[1] << 12);
+ out[j++] = (unsigned char)((triple >> 16) & 0xff);
+
+ if (quartet[2] >= 0) {
+ triple |= (uint32_t)quartet[2] << 6;
+ out[j++] = (unsigned char)((triple >> 8) & 0xff);
+ }
+ if (quartet[3] >= 0) {
+ triple |= (uint32_t)quartet[3];
+ out[j++] = (unsigned char)(triple & 0xff);
+ }
+ q = 0;
+ }
+ }
+
+ if (q != 0) goto invalid;
+ *out_len = j;
+ return out;
+
+invalid:
+ cloudsync_memory_free(out);
+ *out_len = 0;
+ return NULL;
+}
+
+static int network_apply_check_chunk(sqlite3_context *context, const char *chunk_json, size_t chunk_json_len,
+ bool final_chunk, int *pnrows, char **err_out, int64_t *pbytes) {
+ if (pnrows) *pnrows = 0;
+ if (pbytes) *pbytes = 0;
+
+ char *download_url = json_extract_string(chunk_json, chunk_json_len, "url");
+ char *inline_payload = download_url ? NULL : json_extract_string(chunk_json, chunk_json_len, "payload");
+ if (!download_url && !inline_payload) {
+ sqlite3_result_error(context, "cloudsync_network_receive_changes: check chunk is missing url or payload.", -1);
+ return SQLITE_ERROR;
+ }
+
+ // A non-final chunk must never advance the receive cursor (see cloudsync.h):
+ // landing mid-db_version would let the next /check skip the unapplied
+ // remainder. Only the final chunk advances -- to the explicit watermark, or
+ // the legacy last-applied fallback when it is absent.
+ int64_t watermark = json_extract_int(chunk_json, chunk_json_len, "watermark", -1);
+ int64_t checkpoint_db_version;
+ if (!final_chunk) {
+ checkpoint_db_version = CLOUDSYNC_CHECKPOINT_NONE;
+ } else {
+ checkpoint_db_version = (watermark < 0) ? CLOUDSYNC_CHECKPOINT_LAST_APPLIED : watermark;
+ }
+ int64_t checkpoint_seq = 0;
+
+ int rc = SQLITE_OK;
+ if (download_url) {
+ rc = network_download_changes(context, download_url, pnrows, err_out,
+ checkpoint_db_version, checkpoint_seq, pbytes);
+ } else {
+ size_t decoded_size = 0;
+ unsigned char *decoded = network_base64_decode(inline_payload, strlen(inline_payload), &decoded_size);
+ if (!decoded || decoded_size > INT_MAX) {
+ if (decoded) cloudsync_memory_free(decoded);
+ sqlite3_result_error(context, "cloudsync_network_receive_changes: invalid inline payload in check response.", -1);
+ rc = SQLITE_ERROR;
+ } else {
+ if (pbytes) *pbytes = (int64_t)decoded_size;
+ rc = network_apply_payload_buffer(context, (const char *)decoded, (int)decoded_size, pnrows, err_out,
+ checkpoint_db_version, checkpoint_seq,
+ "cloudsync_network_receive_changes: inline payload apply failed");
+ cloudsync_memory_free(decoded);
+ }
+ }
+
+ #ifdef CLOUDSYNC_NETWORK_TRACE
+ fprintf(stderr,
+ "[cloudsync-network] check chunk transport=%s chunked=%d final=%d rc=%d\n",
+ download_url ? "url" : (inline_payload ? "inline" : "none"),
+ (watermark >= 0), final_chunk, rc);
+ #endif
+
+ if (download_url) cloudsync_memory_free(download_url);
+ if (inline_payload) cloudsync_memory_free(inline_payload);
+ return rc;
+}
+
+char *network_apply_json_payload(const char *transport_key, const char *transport_value,
+ int64_t db_version_min, int64_t db_version_max,
+ const char *batch_id, int chunk_index, bool is_final) {
+ if (!transport_key || !transport_value || !batch_id) return NULL;
char *escaped_value = json_escape_string(transport_value);
if (!escaped_value) return NULL;
- size_t requested = strlen(transport_key) + strlen(escaped_value) + 128;
+ size_t requested = strlen(transport_key) + strlen(escaped_value) + strlen(batch_id) + 192;
char *json_payload = cloudsync_memory_alloc((uint64_t)requested);
if (!json_payload) {
cloudsync_memory_free(escaped_value);
@@ -1270,15 +1515,142 @@ static char *network_apply_json_payload(const char *transport_key, const char *t
}
snprintf(json_payload, requested,
- "{\"%s\":\"%s\", \"dbVersionMin\":%d, \"dbVersionMax\":%d}",
- transport_key, escaped_value, db_version_min, db_version_max);
+ "{\"%s\":\"%s\", \"dbVersionMin\":%" PRId64 ", \"dbVersionMax\":%" PRId64
+ ", \"batchId\":\"%s\", \"chunkIndex\":%d, \"isFinal\":%s}",
+ transport_key, escaped_value, db_version_min, db_version_max,
+ batch_id, chunk_index, is_final ? "true" : "false");
cloudsync_memory_free(escaped_value);
return json_payload;
}
-static const char *network_compute_status(int64_t last_optimistic, int64_t last_confirmed,
- int gaps_size, int64_t local_version) {
+static int network_send_payload_to_apply(sqlite3_context *context, network_data *netdata,
+ const void *blob, int blob_size,
+ int64_t db_version_min, int64_t db_version_max,
+ const char *batch_id, int chunk_index, bool is_final,
+ NETWORK_RESULT *res_out) {
+ memset(res_out, 0, sizeof(*res_out));
+ if (!blob || blob_size <= 0) {
+ sqlite3_result_error(context, "cloudsync_network_send_changes: invalid empty payload chunk.", -1);
+ return SQLITE_ERROR;
+ }
+
+ #ifdef CLOUDSYNC_NETWORK_TRACE
+ fprintf(stderr,
+ "[cloudsync-network] send_changes chunk_size=%d fast-lane:%s db_version_min=%" PRId64 " db_version_max=%" PRId64
+ " batch_id=%s chunk_index=%d is_final=%d\n",
+ blob_size,
+ blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE ? "true" : "false",
+ db_version_min,
+ db_version_max,
+ batch_id,
+ chunk_index,
+ is_final ? 1 : 0);
+ #endif
+
+ if (blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE) {
+ char *blob_base64 = network_base64_encode((const unsigned char *)blob, (size_t)blob_size);
+ if (!blob_base64) {
+ sqlite3_result_error(context, "cloudsync_network_send_changes: unable to encode payload chunk.", -1);
+ sqlite3_result_error_code(context, SQLITE_NOMEM);
+ return SQLITE_NOMEM;
+ }
+
+ char *json_payload = network_apply_json_payload("blob", blob_base64, db_version_min, db_version_max,
+ batch_id, chunk_index, is_final);
+ cloudsync_memory_free(blob_base64);
+ if (!json_payload) {
+ sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1);
+ sqlite3_result_error_code(context, SQLITE_NOMEM);
+ return SQLITE_NOMEM;
+ }
+
+ *res_out = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true,
+ json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
+ cloudsync_memory_free(json_payload);
+ return SQLITE_OK;
+ }
+
+ NETWORK_RESULT upload_res = network_receive_buffer(netdata, netdata->upload_endpoint, netdata->authentication, true, false,
+ NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
+ if (upload_res.code != CLOUDSYNC_NETWORK_BUFFER) {
+ network_result_to_sqlite_error(context, upload_res, "cloudsync_network_send_changes unable to receive upload URL");
+ network_result_cleanup(&upload_res);
+ return SQLITE_ERROR;
+ }
+
+ char *s3_url = json_extract_string(upload_res.buffer, upload_res.blen, "url");
+ if (!s3_url) {
+ sqlite3_result_error(context, "cloudsync_network_send_changes: missing 'url' in upload response.", -1);
+ network_result_cleanup(&upload_res);
+ return SQLITE_ERROR;
+ }
+
+ bool sent = network_send_buffer(netdata, s3_url, NULL, blob, blob_size);
+ if (sent == false) {
+ cloudsync_memory_free(s3_url);
+ network_result_to_sqlite_error(context, upload_res, "cloudsync_network_send_changes unable to upload payload chunk to remote host.");
+ network_result_cleanup(&upload_res);
+ return SQLITE_ERROR;
+ }
+
+ char *json_payload = network_apply_json_payload("url", s3_url, db_version_min, db_version_max,
+ batch_id, chunk_index, is_final);
+ cloudsync_memory_free(s3_url);
+ if (!json_payload) {
+ sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1);
+ sqlite3_result_error_code(context, SQLITE_NOMEM);
+ network_result_cleanup(&upload_res);
+ return SQLITE_NOMEM;
+ }
+
+ network_result_cleanup(&upload_res);
+ *res_out = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true,
+ json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
+ cloudsync_memory_free(json_payload);
+ return SQLITE_OK;
+}
+
+void network_sync_state_update_from_response(NETWORK_RESULT *res,
+ int64_t *last_optimistic_version,
+ int64_t *last_confirmed_version,
+ int *gaps_size,
+ char **apply_failure_json,
+ char **check_failure_json) {
+ if (!res || res->code != CLOUDSYNC_NETWORK_BUFFER || !res->buffer) return;
+
+ // Take the latest valid (>= 0) value, not the max: the server can move these
+ // BACKWARD on a rollback when a later send chunk fails, and lastOptimisticVersion
+ // becomes the durable send checkpoint — masking a decrease would advance the
+ // checkpoint past the rolled-back changes and silently drop them.
+ int64_t parsed_optimistic = json_extract_int(res->buffer, res->blen, "lastOptimisticVersion", -1);
+ if (parsed_optimistic >= 0) *last_optimistic_version = parsed_optimistic;
+ int64_t parsed_confirmed = json_extract_int(res->buffer, res->blen, "lastConfirmedVersion", -1);
+ if (parsed_confirmed >= 0) *last_confirmed_version = parsed_confirmed;
+ int parsed_gaps_size = json_extract_array_size(res->buffer, res->blen, "gaps");
+ if (parsed_gaps_size >= 0) *gaps_size = parsed_gaps_size;
+
+ char *apply_failure = json_extract_failure_stage(res->buffer, res->blen, "apply");
+ if (apply_failure) {
+ if (*apply_failure_json) cloudsync_memory_free(*apply_failure_json);
+ *apply_failure_json = apply_failure;
+ }
+
+ char *check_failure = json_extract_failure_stage(res->buffer, res->blen, "check");
+ if (check_failure) {
+ if (*check_failure_json) cloudsync_memory_free(*check_failure_json);
+ *check_failure_json = check_failure;
+ }
+
+ #ifdef CLOUDSYNC_NETWORK_TRACE
+ // Full endpoint response body that the sync-state fields above were parsed from.
+ // The buffer is not guaranteed NUL-terminated, so bound the print with its length.
+ fprintf(stderr, "[cloudsync-network] sync_state response=%.*s\n", (int)res->blen, res->buffer);
+ #endif
+}
+
+const char *network_compute_status(int64_t last_optimistic, int64_t last_confirmed,
+ int gaps_size, int64_t local_version) {
if (last_optimistic < 0 || last_confirmed < 0) return "error";
if (gaps_size > 0 || last_optimistic < local_version) return "out-of-sync";
if (last_optimistic == last_confirmed) return "synced";
@@ -1326,6 +1698,8 @@ void cloudsync_network_has_unsent_changes (sqlite3_context *context, int argc, s
int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc, sqlite3_value **argv, sync_result *out) {
DEBUG_FUNCTION("cloudsync_network_send_changes");
+ UNUSED_PARAMETER(argc);
+ UNUSED_PARAMETER(argv);
// retrieve global context
cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
@@ -1333,125 +1707,111 @@ int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc,
network_data *netdata = (network_data *)cloudsync_auxdata(data);
if (!netdata) {sqlite3_result_error(context, "Unable to retrieve CloudSync network context.", -1); return SQLITE_ERROR;}
- // retrieve payload
- char *blob = NULL;
- int blob_size = 0, db_version = 0;
- int64_t new_db_version = 0;
- int rc = cloudsync_payload_get(data, &blob, &blob_size, &db_version, &new_db_version);
+ int64_t db_version = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION);
+ if (db_version < 0) {
+ sqlite3_result_error(context, "Unable to retrieve db_version.", -1);
+ return SQLITE_ERROR;
+ }
+
+ sqlite3 *db = sqlite3_context_db_handle(context);
+ sqlite3_stmt *stmt = NULL;
+ const char *chunk_sql =
+ "SELECT payload, payload_size, watermark_db_version, is_final "
+ "FROM cloudsync_payload_chunks WHERE since_db_version = ?";
+ int rc = sqlite3_prepare_v2(db, chunk_sql, -1, &stmt, NULL);
if (rc != SQLITE_OK) {
- if (db_version < 0) sqlite3_result_error(context, "Unable to retrieve db_version.", -1);
- else sqlite3_result_error(context, "Unable to retrieve changes in cloudsync_network_send_changes", -1);
+ sqlite3_result_error(context, sqlite3_errmsg(db), -1);
+ sqlite3_result_error_code(context, rc);
return rc;
}
-
- // Case 1: empty local db — no payload and no server state, skip network entirely
- if ((blob == NULL || blob_size == 0) && db_version == 0) {
- if (out) {
- out->server_version = 0;
- out->local_version = 0;
- out->status = network_compute_status(0, 0, 0, 0);
- }
- return SQLITE_OK;
- }
-
- NETWORK_RESULT res;
- if (blob != NULL && blob_size > 0) {
- int db_version_min = db_version+1;
- int db_version_max = (int)new_db_version;
- if (db_version_min > db_version_max) db_version_min = db_version_max;
-
- #ifdef CLOUDSYNC_NETWORK_TRACE
- fprintf(stderr,
- "[cloudsync-network] send_changes blob_size=%d fast-lane:%s\n",
- blob_size,
- blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE ? "true" : "false");
- #endif
-
- if (blob_size <= CLOUDSYNC_NETWORK_FAST_LANE_MAX_BLOB_SIZE) {
- char *blob_base64 = network_base64_encode((const unsigned char *)blob, (size_t)blob_size);
- cloudsync_memory_free(blob);
- if (!blob_base64) {
- sqlite3_result_error(context, "cloudsync_network_send_changes: unable to encode BLOB changes.", -1);
- sqlite3_result_error_code(context, SQLITE_NOMEM);
- return SQLITE_NOMEM;
- }
+ sqlite3_bind_int64(stmt, 1, db_version);
- char *json_payload = network_apply_json_payload("blob", blob_base64, db_version_min, db_version_max);
- cloudsync_memory_free(blob_base64);
- if (!json_payload) {
- sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1);
- sqlite3_result_error_code(context, SQLITE_NOMEM);
- return SQLITE_NOMEM;
- }
+ int64_t new_db_version = db_version;
+ int64_t last_optimistic_version = -1;
+ int64_t last_confirmed_version = -1;
+ int gaps_size = -1;
+ char *apply_failure_json = NULL;
+ char *check_failure_json = NULL;
+ bool sent_any = false;
+ int sent_chunks = 0; // payload chunks sent this call
+ int64_t sent_bytes = 0; // serialized payload bytes sent this call
+ // One send call = one all-or-nothing batch: every chunk announces the same
+ // global window [send_checkpoint+1 .. watermark] (holes from non-local-change
+ // db_versions included) plus batchId/chunkIndex/isFinal. The server advances
+ // optimistic on the final chunk and confirms the whole window only when every
+ // chunk of the batch applied; a failed batch is re-sent whole under a new id.
+ char batch_id[UUID_STR_MAXLEN];
+ cloudsync_uuid_v7_string(batch_id, true);
+
+ while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
+ const void *blob = sqlite3_column_blob(stmt, 0);
+ int blob_size = sqlite3_column_bytes(stmt, 0);
+ int64_t payload_size = sqlite3_column_int64(stmt, 1);
+ int64_t watermark = sqlite3_column_int64(stmt, 2);
+ bool is_final = sqlite3_column_int(stmt, 3) != 0;
+
+ if (!blob || blob_size <= 0 || payload_size != blob_size || payload_size > INT_MAX ||
+ watermark <= db_version) {
+ sqlite3_result_error(context, "cloudsync_network_send_changes: invalid payload chunk generated.", -1);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
- res = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
- cloudsync_memory_free(json_payload);
- } else {
- // bulk lane: stage the payload through the upload endpoint and apply by URL
- res = network_receive_buffer(netdata, netdata->upload_endpoint, netdata->authentication, true, false, NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
- if (res.code != CLOUDSYNC_NETWORK_BUFFER) {
- cloudsync_memory_free(blob);
- network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to receive upload URL");
- network_result_cleanup(&res);
- return SQLITE_ERROR;
- }
+ NETWORK_RESULT res = {0};
+ rc = network_send_payload_to_apply(context, netdata, blob, blob_size, db_version + 1, watermark,
+ batch_id, sent_chunks, is_final, &res);
+ if (rc != SQLITE_OK) goto cleanup;
- char *s3_url = json_extract_string(res.buffer, res.blen, "url");
- if (!s3_url) {
- cloudsync_memory_free(blob);
- sqlite3_result_error(context, "cloudsync_network_send_changes: missing 'url' in upload response.", -1);
- network_result_cleanup(&res);
- return SQLITE_ERROR;
- }
- bool sent = network_send_buffer(netdata, s3_url, NULL, blob, blob_size);
- cloudsync_memory_free(blob);
- if (sent == false) {
- cloudsync_memory_free(s3_url);
- network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to upload BLOB changes to remote host.");
- network_result_cleanup(&res);
- return SQLITE_ERROR;
- }
+ if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) {
+ network_sync_state_update_from_response(&res, &last_optimistic_version, &last_confirmed_version, &gaps_size,
+ &apply_failure_json, &check_failure_json);
+ } else if (res.code != CLOUDSYNC_NETWORK_OK) {
+ network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to apply changes to remote host.");
+ network_result_cleanup(&res);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ network_result_cleanup(&res);
- char *json_payload = network_apply_json_payload("url", s3_url, db_version_min, db_version_max);
- cloudsync_memory_free(s3_url);
- if (!json_payload) {
- sqlite3_result_error(context, "cloudsync_network_send_changes: unable to allocate apply request payload.", -1);
- sqlite3_result_error_code(context, SQLITE_NOMEM);
- network_result_cleanup(&res);
- return SQLITE_NOMEM;
+ sent_any = true;
+ sent_chunks++;
+ sent_bytes += payload_size;
+ if (watermark > new_db_version) new_db_version = watermark;
+ }
+ if (rc != SQLITE_DONE) {
+ sqlite3_result_error(context, sqlite3_errmsg(db), -1);
+ sqlite3_result_error_code(context, rc);
+ goto cleanup;
+ }
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ if (!sent_any) {
+ // Empty local db with no server state: preserve the previous fast no-op path.
+ if (db_version == 0) {
+ if (out) {
+ out->server_version = 0;
+ out->local_version = 0;
+ out->status = network_compute_status(0, 0, 0, 0);
}
+ rc = SQLITE_OK;
+ goto cleanup;
+ }
- // free res
+ NETWORK_RESULT res = network_receive_buffer(netdata, netdata->status_endpoint, netdata->authentication, true, false,
+ NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
+ if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) {
+ network_sync_state_update_from_response(&res, &last_optimistic_version, &last_confirmed_version, &gaps_size,
+ &apply_failure_json, &check_failure_json);
+ } else if (res.code != CLOUDSYNC_NETWORK_OK) {
+ network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to apply changes to remote host.");
network_result_cleanup(&res);
-
- // notify remote host that we successfully uploaded changes
- res = network_receive_buffer(netdata, netdata->apply_endpoint, netdata->authentication, true, true, json_payload, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
- cloudsync_memory_free(json_payload);
+ rc = SQLITE_ERROR;
+ goto cleanup;
}
- } else {
- // there is no data to send, just check the status to update the db_version value in settings and to reply the status
- new_db_version = db_version;
- res = network_receive_buffer(netdata, netdata->status_endpoint, netdata->authentication, true, false, NULL, cloudsync_default_headers, ARRAY_LEN(cloudsync_default_headers));
- }
-
- int64_t last_optimistic_version = -1;
- int64_t last_confirmed_version = -1;
- int gaps_size = -1;
- char *apply_failure_json = NULL;
- char *check_failure_json = NULL;
-
- if (res.code == CLOUDSYNC_NETWORK_BUFFER && res.buffer) {
- last_optimistic_version = json_extract_int(res.buffer, res.blen, "lastOptimisticVersion", -1);
- last_confirmed_version = json_extract_int(res.buffer, res.blen, "lastConfirmedVersion", -1);
- gaps_size = json_extract_array_size(res.buffer, res.blen, "gaps");
- if (gaps_size < 0) gaps_size = 0;
- apply_failure_json = json_extract_failure_stage(res.buffer, res.blen, "apply");
- check_failure_json = json_extract_failure_stage(res.buffer, res.blen, "check");
- } else if (res.code != CLOUDSYNC_NETWORK_OK) {
- network_result_to_sqlite_error(context, res, "cloudsync_network_send_changes unable to apply changes to remote host.");
network_result_cleanup(&res);
- return SQLITE_ERROR;
}
+ if (gaps_size < 0) gaps_size = 0;
// update db_version in settings
char buf[256];
@@ -1470,6 +1830,8 @@ int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc,
out->server_version = last_optimistic_version;
out->local_version = new_db_version;
out->status = network_compute_status(last_optimistic_version, last_confirmed_version, gaps_size, new_db_version);
+ out->send_chunks = sent_chunks;
+ out->send_bytes = sent_bytes;
out->apply_failure_json = apply_failure_json;
out->check_failure_json = check_failure_json;
apply_failure_json = NULL;
@@ -1477,9 +1839,13 @@ int cloudsync_network_send_changes_internal (sqlite3_context *context, int argc,
}
if (apply_failure_json) cloudsync_memory_free(apply_failure_json);
if (check_failure_json) cloudsync_memory_free(check_failure_json);
-
- network_result_cleanup(&res);
return SQLITE_OK;
+
+cleanup:
+ if (stmt) sqlite3_finalize(stmt);
+ if (apply_failure_json) cloudsync_memory_free(apply_failure_json);
+ if (check_failure_json) cloudsync_memory_free(check_failure_json);
+ return rc == SQLITE_DONE ? SQLITE_OK : rc;
}
void cloudsync_network_send_changes (sqlite3_context *context, int argc, sqlite3_value **argv) {
@@ -1498,22 +1864,24 @@ void cloudsync_network_send_changes (sqlite3_context *context, int argc, sqlite3
char *buf;
if (sr.apply_failure_json) {
buf = cloudsync_memory_mprintf(
- "{\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld,\"lastFailure\":%s}}",
+ "{\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld,\"chunks\":%d,\"bytes\":%lld,\"lastFailure\":%s}}",
sr.status ? sr.status : "error",
(long long)sr.local_version, (long long)sr.server_version,
+ sr.send_chunks, (long long)sr.send_bytes,
sr.apply_failure_json);
} else {
buf = cloudsync_memory_mprintf(
- "{\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld}}",
+ "{\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld,\"chunks\":%d,\"bytes\":%lld}}",
sr.status ? sr.status : "error",
- (long long)sr.local_version, (long long)sr.server_version);
+ (long long)sr.local_version, (long long)sr.server_version,
+ sr.send_chunks, (long long)sr.send_bytes);
}
sqlite3_result_text(context, buf, -1, cloudsync_memory_free);
if (sr.apply_failure_json) cloudsync_memory_free(sr.apply_failure_json);
if (sr.check_failure_json) cloudsync_memory_free(sr.check_failure_json);
}
-int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows, sync_result *out, char **err_out) {
+int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows, sync_result *out, char **err_out, int request_max_chunks) {
cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
network_data *netdata = (network_data *)cloudsync_auxdata(data);
if (!netdata) {sqlite3_result_error(context, "Unable to retrieve CloudSync network context.", -1); return -1;}
@@ -1524,25 +1892,168 @@ int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows, sync
int seq = dbutils_settings_get_int_value(data, CLOUDSYNC_KEY_CHECK_SEQ);
if (seq<0) {sqlite3_result_error(context, "Unable to retrieve seq.", -1); return -1;}
+ // Restart paging whenever the durable receive window changes: the page cursor
+ // is only meaningful within a single drain (check_dbversion held at "since").
+ if (netdata->check_cursor_since != db_version) {
+ netdata->check_cursor = 0;
+ netdata->check_cursor_since = db_version;
+ }
+
// Capture local db_version before download so we can query cloudsync_changes afterwards
int64_t prev_dbv = cloudsync_dbversion(data);
- char json_payload[2024];
- snprintf(json_payload, sizeof(json_payload), "{\"dbVersion\":%lld, \"seq\":%d}", (long long)db_version, seq);
+ // "cursor" is the spool page to serve. "maxChunks" lets cursor-spool servers
+ // batch several contiguous prepared pages in one response when present.
+ // Old/legacy servers ignore the unknown fields and omit them from the response;
+ // the client then never self-pages (check_cursor stays 0), preserving current behavior.
+ char json_payload[2048];
+ if (request_max_chunks > 0) {
+ snprintf(json_payload, sizeof(json_payload), "{\"dbVersion\":%lld, \"seq\":%d, \"cursor\":%lld, \"maxChunks\":%d}",
+ (long long)db_version, seq, (long long)netdata->check_cursor, request_max_chunks);
+ } else {
+ snprintf(json_payload, sizeof(json_payload), "{\"dbVersion\":%lld, \"seq\":%d, \"cursor\":%lld}",
+ (long long)db_version, seq, (long long)netdata->check_cursor);
+ }
+
+ if (out) { out->page_delivered = false; out->more_pending = false; out->bytes_received = 0; out->chunks_received = 0; }
NETWORK_RESULT result = network_receive_buffer(netdata, netdata->check_endpoint, netdata->authentication, true, true, json_payload, cloudsync_check_headers, ARRAY_LEN(cloudsync_check_headers));
int rc = SQLITE_OK;
if (result.code == CLOUDSYNC_NETWORK_BUFFER) {
- // The /check endpoint returns one of two shapes:
- // HTTP 200 → {"url": "..."} (artifact ready for download)
- // HTTP 202 → SyncStatusResponse (no artifact yet — status snapshot,
- // may include failures.check)
- // Branch on the presence of "url" rather than HTTP status; both shapes arrive as BUFFER.
- char *download_url = json_extract_string(result.buffer, result.blen, "url");
- if (download_url) {
- rc = network_download_changes(context, download_url, pnrows, err_out);
- cloudsync_memory_free(download_url);
+ // The /check endpoint returns one of these shapes:
+ // HTTP 200 -> {"url": "..."} (legacy artifact URL)
+ // HTTP 200 -> {"data":{"payload": "...", ...}} (single inline payload)
+ // HTTP 200 -> {"data":{"chunks":[...], ...}} (cursor-spool batch)
+ // HTTP 202 -> SyncStatusResponse (no artifact yet/status)
+ // Branch on the presence of transport fields rather than HTTP status; all
+ // shapes arrive as BUFFER. Newer servers wrap page metadata in "data";
+ // legacy responses put "url" at the top level.
+ char *data_json = json_extract_object_raw(result.buffer, result.blen, "data");
+ const char *check_json = data_json ? data_json : result.buffer;
+ size_t check_json_len = data_json ? strlen(data_json) : result.blen;
+
+ int rows_total = 0;
+ int chunks_total = 0;
+ int64_t bytes_total = 0;
+ bool delivered = false;
+ bool more_pending = false;
+ int64_t next_cursor = json_extract_int(check_json, check_json_len, "nextCursor", -1);
+
+ int ntokens = 0;
+ int chunks_index = -1;
+ jsmntok_t *tokens = json_parse_tokens_alloc(check_json, check_json_len, &ntokens);
+ bool has_chunks = tokens && tokens[0].type == JSMN_OBJECT &&
+ jsmn_find_object_value(check_json, tokens, ntokens, 0, "chunks", &chunks_index) &&
+ chunks_index >= 0 && chunks_index < ntokens &&
+ tokens[chunks_index].type == JSMN_ARRAY;
+
+ if (!tokens) {
+ // The body is BUFFER (non-empty) but unparseable (malformed, truncated,
+ // or token allocation failed). Without this, a chunks batch we failed to
+ // tokenize would fall through to the single-payload branch, find no
+ // url/payload, and be misreported as an empty "up to date" response,
+ // silently dropping the pending batch.
+ sqlite3_result_error(context, "cloudsync_network_receive_changes: unable to parse check response.", -1);
+ rc = SQLITE_ERROR;
+ } else if (has_chunks && request_max_chunks > 0 && tokens[chunks_index].size > request_max_chunks) {
+ sqlite3_result_error(context, "cloudsync_network_receive_changes: check response exceeded requested maxChunks.", -1);
+ rc = SQLITE_ERROR;
+ } else if (has_chunks && tokens[chunks_index].size > 0) {
+ bool final_batch = json_extract_bool(check_json, check_json_len, "final", true);
+ int64_t last_cursor = -1;
+ int child = chunks_index + 1;
+
+ for (int i = 0; rc == SQLITE_OK && i < tokens[chunks_index].size; i++) {
+ if (child >= ntokens || tokens[child].type != JSMN_OBJECT) {
+ sqlite3_result_error(context, "cloudsync_network_receive_changes: invalid check chunks response.", -1);
+ rc = SQLITE_ERROR;
+ break;
+ }
+
+ const char *chunk_json = check_json + tokens[child].start;
+ size_t chunk_json_len = (size_t)(tokens[child].end - tokens[child].start);
+ bool final_chunk = final_batch && (i == tokens[chunks_index].size - 1);
+ int chunk_rows = 0;
+ int64_t chunk_bytes = 0;
+ rc = network_apply_check_chunk(context, chunk_json, chunk_json_len, final_chunk,
+ &chunk_rows, err_out, &chunk_bytes);
+ if (rc == SQLITE_OK) {
+ rows_total += chunk_rows;
+ bytes_total += chunk_bytes;
+ chunks_total++;
+ delivered = true;
+ last_cursor = json_extract_int(chunk_json, chunk_json_len, "cursor", last_cursor);
+ netdata->check_cursor = final_chunk ? 0 : last_cursor + 1;
+ }
+ child += jsmn_token_span(tokens, ntokens, child);
+ }
+
+ if (rc == SQLITE_OK) {
+ if (!final_batch && next_cursor < 0 && last_cursor >= 0) next_cursor = last_cursor + 1;
+ if (!final_batch && next_cursor < 0) {
+ // Non-final batch with no resumable cursor. The server contract
+ // always supplies nextCursor (or a per-chunk cursor) on a non-final
+ // batch, so a missing one would otherwise make us silently drop the
+ // rest of the stream and report a false "complete". Fail loudly.
+ sqlite3_result_error(context, "cloudsync_network_receive_changes: non-final check batch missing next cursor.", -1);
+ rc = SQLITE_ERROR;
+ } else {
+ more_pending = !final_batch && next_cursor >= 0;
+ }
+ }
+ } else if (has_chunks) {
+ // Explicit empty cursor-spool batch: no artifact in flight.
+ netdata->check_cursor = 0;
+ if (pnrows) *pnrows = 0;
+ } else {
+ char *download_url = json_extract_string(check_json, check_json_len, "url");
+ char *inline_payload = download_url ? NULL : json_extract_string(check_json, check_json_len, "payload");
+ if (download_url || inline_payload) {
+ bool final_chunk = json_extract_bool(check_json, check_json_len, "final", true);
+ int chunk_rows = 0;
+ int64_t chunk_bytes = 0;
+ rc = network_apply_check_chunk(context, check_json, check_json_len, final_chunk,
+ &chunk_rows, err_out, &chunk_bytes);
+ if (rc == SQLITE_OK && !final_chunk && next_cursor < 0) {
+ // Symmetric with the chunks-array path: a non-final response
+ // with no resumable cursor would otherwise silently drop the
+ // rest of the stream and report a false "complete". Fail loudly.
+ sqlite3_result_error(context, "cloudsync_network_receive_changes: non-final check response missing next cursor.", -1);
+ rc = SQLITE_ERROR;
+ } else if (rc == SQLITE_OK) {
+ rows_total = chunk_rows;
+ bytes_total = chunk_bytes;
+ chunks_total = 1;
+ delivered = true;
+ more_pending = !final_chunk && next_cursor >= 0;
+ }
+ } else {
+ // 202 / "up to date": no artifact in flight -> reset paging.
+ netdata->check_cursor = 0;
+ if (pnrows) *pnrows = 0;
+ }
+ if (download_url) cloudsync_memory_free(download_url);
+ if (inline_payload) cloudsync_memory_free(inline_payload);
}
+
+ if (tokens) cloudsync_memory_free(tokens);
+
+ if (rc == SQLITE_OK && delivered) {
+ // Finalize cursor state after the returned batch is applied/staged.
+ // Batched responses advance the in-memory spool cursor after each
+ // successful chunk, so a later failure retries from the failed chunk.
+ netdata->check_cursor = more_pending ? next_cursor : 0;
+ if (pnrows) *pnrows = rows_total;
+ if (out) {
+ out->page_delivered = true;
+ out->more_pending = more_pending;
+ out->bytes_received = bytes_total;
+ out->chunks_received = chunks_total;
+ }
+ } else {
+ if (pnrows) *pnrows = 0;
+ }
+ if (data_json) cloudsync_memory_free(data_json);
// failures.check may appear in either shape; extract opportunistically.
if (out) {
char *check_failure = json_extract_failure_stage(result.buffer, result.blen, "check");
@@ -1561,8 +2072,10 @@ int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows, sync
if (out && pnrows) out->rows_received = *pnrows;
- // Query cloudsync_changes for affected tables after successful download
- if (out && rc == SQLITE_OK && pnrows && *pnrows > 0) {
+ // Query cloudsync_changes for affected tables after successful download.
+ // Skipped when the caller defers it (a multi-chunk drain computes the union once
+ // at the end from a db_version captured before the whole drain started).
+ if (out && !out->defer_tables && rc == SQLITE_OK && pnrows && *pnrows > 0) {
sqlite3 *db = (sqlite3 *)cloudsync_db(data);
out->tables_json = network_get_affected_tables(db, prev_dbv);
}
@@ -1571,6 +2084,107 @@ int cloudsync_network_check_internal(sqlite3_context *context, int *pnrows, sync
return rc;
}
+// Result of a receive drain (see network_drain_changes).
+typedef struct {
+ int rows; // cumulative rows applied across the drain
+ int chunks; // payload chunks applied this drain
+ int64_t bytes; // serialized payload bytes received this drain
+ bool complete; // true iff the receive stream is fully drained (nothing pending)
+ bool check_permanent_failure; // server reported a non-retryable failures.check: polling stopped early
+ char *receive_err; // owned by the caller; client-side apply error, or NULL
+} drain_result;
+
+// Drains chunked /check responses into the local database. Chunks that are already
+// available are fetched back-to-back with no delay; wait_ms/max_retries are spent
+// only while the server payload is not yet ready (HTTP 202). max_chunks > 0 caps how
+// many chunks are applied this call (caller-driven progress/traffic control);
+// max_chunks <= 0 drains everything available. The in-memory page cursor persists on
+// the network context, so a capped drain resumes where it left off on the next call.
+// Fills *dr and sr->tables_json (the union of tables touched across the drain).
+static int network_drain_changes (sqlite3_context *context, sync_result *sr,
+ int wait_ms, int max_retries, int max_chunks,
+ drain_result *dr) {
+ cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
+ sqlite3 *db = (sqlite3 *)cloudsync_db(data);
+ // Capture the local db_version once before draining so the affected-tables union
+ // across all drained chunks can be computed with a single query at the end.
+ int64_t drain_prev_dbv = cloudsync_dbversion(data);
+ sr->defer_tables = true;
+
+ int ntries = 0; // counts only "nothing ready" (202) polls
+ int nrows_total = 0; // cumulative rows applied across the whole drain
+ int nchunks = 0; // payload chunks applied this call
+ int64_t bytes_total = 0; // serialized payload bytes received this call
+ bool complete = true; // false iff the stream is known to have more pending
+ bool check_permanent_failure = false; // server reported a non-retryable failures.check
+ char *receive_err = NULL;
+ int rc = SQLITE_OK;
+ for (;;) {
+ if (receive_err) { cloudsync_memory_free(receive_err); receive_err = NULL; }
+
+ int request_max_chunks = 0;
+ if (max_chunks > 0) {
+ int remaining = max_chunks - nchunks;
+ if (remaining <= 0) break;
+ request_max_chunks = remaining;
+ }
+ int safety_remaining = CLOUDSYNC_CHECK_MAX_CHUNKS_PER_DRAIN - nchunks;
+ if (safety_remaining <= 0) { complete = false; break; }
+ if (request_max_chunks > 0) {
+ if (safety_remaining < request_max_chunks) request_max_chunks = safety_remaining;
+ } else if (nchunks > 0) {
+ request_max_chunks = safety_remaining;
+ }
+
+ int nrows = 0;
+ rc = cloudsync_network_check_internal(context, &nrows, sr, &receive_err, request_max_chunks);
+ // a receive error (network or apply) won't fix itself across retries
+ if (rc != SQLITE_OK) { complete = false; break; }
+
+ if (sr->page_delivered) {
+ nrows_total += nrows; // a staged (incomplete) fragment contributes 0
+ bytes_total += sr->bytes_received;
+ nchunks += sr->chunks_received;
+ complete = !sr->more_pending; // reflects whether the stream is finished
+ if (!sr->more_pending) break; // final batch -> drained
+ if (max_chunks > 0 && nchunks >= max_chunks) break; // caller cap: more pending
+ if (nchunks >= CLOUDSYNC_CHECK_MAX_CHUNKS_PER_DRAIN) break; // safety bound: more pending
+ continue; // keep draining immediately
+ }
+
+ // A non-retryable server-side check failure (e.g. a permission/authorization
+ // error) won't clear by waiting: stop draining now instead of polling out the
+ // remaining retries. The failure object stays in sr->check_failure_json so the
+ // caller can surface it (receive.lastFailure / a raised error).
+ if (sr->check_failure_json &&
+ !json_extract_bool(sr->check_failure_json, strlen(sr->check_failure_json), "retryable", true)) {
+ check_permanent_failure = true;
+ complete = false;
+ break;
+ }
+
+ // nothing delivered (202 / up to date): preserve the polling-for-changes semantics.
+ // complete is left as-is (true if no page was ever delivered; false if the last
+ // delivered page was non-final), so a 202 after partial pages reports incomplete.
+ if (ntries + 1 >= max_retries) break;
+ ntries++;
+ sqlite3_sleep(wait_ms);
+ }
+
+ // Compute the affected-tables union once, over the whole drain window.
+ if (!receive_err && rc == SQLITE_OK && nrows_total > 0) {
+ sr->tables_json = network_get_affected_tables(db, drain_prev_dbv);
+ }
+
+ dr->rows = nrows_total;
+ dr->chunks = nchunks;
+ dr->bytes = bytes_total;
+ dr->complete = complete;
+ dr->check_permanent_failure = check_permanent_failure;
+ dr->receive_err = receive_err;
+ return rc;
+}
+
void cloudsync_network_sync (sqlite3_context *context, int wait_ms, int max_retries) {
sync_result sr = {.server_version = -1};
int rc = cloudsync_network_send_changes_internal(context, 0, NULL, &sr);
@@ -1580,19 +2194,11 @@ void cloudsync_network_sync (sqlite3_context *context, int wait_ms, int max_retr
return;
}
- int ntries = 0;
- int nrows = 0;
- char *receive_err = NULL;
- while (ntries < max_retries) {
- if (ntries > 0) sqlite3_sleep(wait_ms);
- if (sr.tables_json) { cloudsync_memory_free(sr.tables_json); sr.tables_json = NULL; }
- if (receive_err) { cloudsync_memory_free(receive_err); receive_err = NULL; }
- rc = cloudsync_network_check_internal(context, &nrows, &sr, &receive_err);
- // a receive error (network or apply) won't fix itself across retries
- if (rc != SQLITE_OK) break;
- if (nrows > 0) break;
- ntries++;
- }
+ // Drain the whole receive stream in one call (max_chunks=0 => unlimited).
+ drain_result dr = {0};
+ rc = network_drain_changes(context, &sr, wait_ms, max_retries, 0, &dr);
+ char *receive_err = dr.receive_err;
+ int nrows_total = dr.rows;
// If the receive phase failed, still emit structured JSON so the caller
// sees that the send phase completed and understands why receive did not.
@@ -1601,11 +2207,12 @@ void cloudsync_network_sync (sqlite3_context *context, int wait_ms, int max_retr
}
if (receive_err) {
rc = SQLITE_OK;
- nrows = 0;
+ nrows_total = 0;
if (sr.tables_json) { cloudsync_memory_free(sr.tables_json); sr.tables_json = NULL; }
}
const char *tables = sr.tables_json ? sr.tables_json : "[]";
+ const char *complete_str = dr.complete ? "true" : "false";
const char *status = sr.status ? sr.status : "error";
char *escaped_err = receive_err ? json_escape_string(receive_err) : NULL;
@@ -1613,29 +2220,29 @@ void cloudsync_network_sync (sqlite3_context *context, int wait_ms, int max_retr
// across optional fields (send.lastFailure, receive.error, receive.lastFailure).
char *send_part = sr.apply_failure_json
? cloudsync_memory_mprintf(
- "\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld,\"lastFailure\":%s}",
- status, (long long)sr.local_version, (long long)sr.server_version, sr.apply_failure_json)
+ "\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld,\"chunks\":%d,\"bytes\":%lld,\"lastFailure\":%s}",
+ status, (long long)sr.local_version, (long long)sr.server_version, sr.send_chunks, (long long)sr.send_bytes, sr.apply_failure_json)
: cloudsync_memory_mprintf(
- "\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld}",
- status, (long long)sr.local_version, (long long)sr.server_version);
+ "\"send\":{\"status\":\"%s\",\"localVersion\":%lld,\"serverVersion\":%lld,\"chunks\":%d,\"bytes\":%lld}",
+ status, (long long)sr.local_version, (long long)sr.server_version, sr.send_chunks, (long long)sr.send_bytes);
char *recv_part;
if (escaped_err && sr.check_failure_json) {
recv_part = cloudsync_memory_mprintf(
- "\"receive\":{\"rows\":%d,\"tables\":%s,\"error\":\"%s\",\"lastFailure\":%s}",
- nrows, tables, escaped_err, sr.check_failure_json);
+ "\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s,\"error\":\"%s\",\"lastFailure\":%s}",
+ nrows_total, tables, dr.chunks, (long long)dr.bytes, complete_str, escaped_err, sr.check_failure_json);
} else if (escaped_err) {
recv_part = cloudsync_memory_mprintf(
- "\"receive\":{\"rows\":%d,\"tables\":%s,\"error\":\"%s\"}",
- nrows, tables, escaped_err);
+ "\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s,\"error\":\"%s\"}",
+ nrows_total, tables, dr.chunks, (long long)dr.bytes, complete_str, escaped_err);
} else if (sr.check_failure_json) {
recv_part = cloudsync_memory_mprintf(
- "\"receive\":{\"rows\":%d,\"tables\":%s,\"lastFailure\":%s}",
- nrows, tables, sr.check_failure_json);
+ "\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s,\"lastFailure\":%s}",
+ nrows_total, tables, dr.chunks, (long long)dr.bytes, complete_str, sr.check_failure_json);
} else {
recv_part = cloudsync_memory_mprintf(
- "\"receive\":{\"rows\":%d,\"tables\":%s}",
- nrows, tables);
+ "\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s}",
+ nrows_total, tables, dr.chunks, (long long)dr.bytes, complete_str);
}
char *buf = cloudsync_memory_mprintf("{%s,%s}", send_part, recv_part);
@@ -1667,15 +2274,20 @@ void cloudsync_network_sync2 (sqlite3_context *context, int argc, sqlite3_value
}
-void cloudsync_network_check_changes (sqlite3_context *context, int argc, sqlite3_value **argv) {
- DEBUG_FUNCTION("cloudsync_network_check_changes");
-
+// Shared implementation for cloudsync_network_receive_changes() and its deprecated
+// alias cloudsync_network_check_changes(). Drains available chunks (no polling-wait
+// for server preparation: wait_ms=0, max_retries=1) and emits a receive-only JSON
+// block. max_chunks <= 0 drains everything available; max_chunks > 0 caps the chunks
+// applied this call so a caller can drive progress/traffic — the in-memory page
+// cursor persists on the network context, so the next call resumes the drain.
+static void network_receive_changes_impl (sqlite3_context *context, int max_chunks) {
// check-scoped: emits receive.error (client-side apply) and/or
// receive.lastFailure (server-side failures.check) only — never a send block.
sync_result sr = {.server_version = -1};
- char *receive_err = NULL;
- int nrows = 0;
- int rc = cloudsync_network_check_internal(context, &nrows, &sr, &receive_err);
+ drain_result dr = {0};
+ int rc = network_drain_changes(context, &sr, 0, 1, max_chunks, &dr);
+ char *receive_err = dr.receive_err;
+ int nrows = dr.rows;
// Endpoint/network errors already raised a SQL error on the context
if (rc != SQLITE_OK && !receive_err) {
@@ -1684,6 +2296,28 @@ void cloudsync_network_check_changes (sqlite3_context *context, int argc, sqlite
return;
}
+ // A non-retryable server-side check failure is a permanent configuration/
+ // authorization problem, not a transient "nothing ready yet": raise a SQL error so
+ // a polling caller fails fast instead of looping until it times out. (receive is a
+ // pure receive op with no send block to preserve, so a raised error is unambiguous.)
+ if (dr.check_permanent_failure && sr.check_failure_json) {
+ char *code = json_extract_string(sr.check_failure_json, strlen(sr.check_failure_json), "code");
+ char *message = json_extract_string(sr.check_failure_json, strlen(sr.check_failure_json), "message");
+ char *err = cloudsync_memory_mprintf(
+ "cloudsync_network_receive_changes: server rejected check (non-retryable): %s%s%s",
+ code ? code : "check failed",
+ message ? " - " : "",
+ message ? message : "");
+ sqlite3_result_error(context, err ? err : "cloudsync_network_receive_changes: server rejected check (non-retryable).", -1);
+ if (err) cloudsync_memory_free(err);
+ if (code) cloudsync_memory_free(code);
+ if (message) cloudsync_memory_free(message);
+ if (receive_err) cloudsync_memory_free(receive_err);
+ if (sr.tables_json) cloudsync_memory_free(sr.tables_json);
+ if (sr.check_failure_json) cloudsync_memory_free(sr.check_failure_json);
+ return;
+ }
+
// Apply errors → structured JSON with receive.error
if (receive_err) {
nrows = 0;
@@ -1691,19 +2325,21 @@ void cloudsync_network_check_changes (sqlite3_context *context, int argc, sqlite
}
const char *tables = sr.tables_json ? sr.tables_json : "[]";
+ const char *complete_str = dr.complete ? "true" : "false";
char *escaped = receive_err ? json_escape_string(receive_err) : NULL;
char *buf;
if (escaped && sr.check_failure_json) {
- buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s,\"error\":\"%s\",\"lastFailure\":%s}}",
- nrows, tables, escaped, sr.check_failure_json);
+ buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s,\"error\":\"%s\",\"lastFailure\":%s}}",
+ nrows, tables, dr.chunks, (long long)dr.bytes, complete_str, escaped, sr.check_failure_json);
} else if (escaped) {
- buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s,\"error\":\"%s\"}}",
- nrows, tables, escaped);
+ buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s,\"error\":\"%s\"}}",
+ nrows, tables, dr.chunks, (long long)dr.bytes, complete_str, escaped);
} else if (sr.check_failure_json) {
- buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s,\"lastFailure\":%s}}",
- nrows, tables, sr.check_failure_json);
+ buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s,\"lastFailure\":%s}}",
+ nrows, tables, dr.chunks, (long long)dr.bytes, complete_str, sr.check_failure_json);
} else {
- buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s}}", nrows, tables);
+ buf = cloudsync_memory_mprintf("{\"receive\":{\"rows\":%d,\"tables\":%s,\"chunks\":%d,\"bytes\":%lld,\"complete\":%s}}",
+ nrows, tables, dr.chunks, (long long)dr.bytes, complete_str);
}
sqlite3_result_text(context, buf, -1, cloudsync_memory_free);
if (escaped) cloudsync_memory_free(escaped);
@@ -1712,6 +2348,30 @@ void cloudsync_network_check_changes (sqlite3_context *context, int argc, sqlite
if (sr.check_failure_json) cloudsync_memory_free(sr.check_failure_json);
}
+void cloudsync_network_receive_changes (sqlite3_context *context, int argc, sqlite3_value **argv) {
+ DEBUG_FUNCTION("cloudsync_network_receive_changes");
+ network_receive_changes_impl(context, 0);
+}
+
+void cloudsync_network_receive_changes1 (sqlite3_context *context, int argc, sqlite3_value **argv) {
+ DEBUG_FUNCTION("cloudsync_network_receive_changes");
+ int max_chunks = sqlite3_value_int(argv[0]);
+ network_receive_changes_impl(context, max_chunks);
+}
+
+// Deprecated: use cloudsync_network_receive_changes(). Kept as a thin alias for
+// backward compatibility; the name will be removed in a future major version.
+void cloudsync_network_check_changes (sqlite3_context *context, int argc, sqlite3_value **argv) {
+ DEBUG_FUNCTION("cloudsync_network_check_changes");
+ network_receive_changes_impl(context, 0);
+}
+
+void cloudsync_network_check_changes1 (sqlite3_context *context, int argc, sqlite3_value **argv) {
+ DEBUG_FUNCTION("cloudsync_network_check_changes");
+ int max_chunks = sqlite3_value_int(argv[0]);
+ network_receive_changes_impl(context, max_chunks);
+}
+
void cloudsync_network_reset_sync_version (sqlite3_context *context, int argc, sqlite3_value **argv) {
DEBUG_FUNCTION("cloudsync_network_reset_sync_version");
@@ -1851,9 +2511,19 @@ int cloudsync_network_register (sqlite3 *db, char **pzErrMsg, void *ctx) {
rc = sqlite3_create_function(db, "cloudsync_network_send_changes", 0, DEFAULT_FLAGS, ctx, cloudsync_network_send_changes, NULL, NULL);
if (rc != SQLITE_OK) return rc;
+ rc = sqlite3_create_function(db, "cloudsync_network_receive_changes", 0, DEFAULT_FLAGS, ctx, cloudsync_network_receive_changes, NULL, NULL);
+ if (rc != SQLITE_OK) return rc;
+
+ rc = sqlite3_create_function(db, "cloudsync_network_receive_changes", 1, DEFAULT_FLAGS, ctx, cloudsync_network_receive_changes1, NULL, NULL);
+ if (rc != SQLITE_OK) return rc;
+
+ // Deprecated alias of cloudsync_network_receive_changes(); kept for backward compatibility.
rc = sqlite3_create_function(db, "cloudsync_network_check_changes", 0, DEFAULT_FLAGS, ctx, cloudsync_network_check_changes, NULL, NULL);
if (rc != SQLITE_OK) return rc;
-
+
+ rc = sqlite3_create_function(db, "cloudsync_network_check_changes", 1, DEFAULT_FLAGS, ctx, cloudsync_network_check_changes1, NULL, NULL);
+ if (rc != SQLITE_OK) return rc;
+
rc = sqlite3_create_function(db, "cloudsync_network_sync", 0, DEFAULT_FLAGS, ctx, cloudsync_network_sync0, NULL, NULL);
if (rc != SQLITE_OK) return rc;
diff --git a/src/network/network_private.h b/src/network/network_private.h
index dae47742..21a46fd4 100644
--- a/src/network/network_private.h
+++ b/src/network/network_private.h
@@ -8,6 +8,9 @@
#ifndef __CLOUDSYNC_NETWORK_PRIVATE__
#define __CLOUDSYNC_NETWORK_PRIVATE__
+#include
+#include
+
#define CLOUDSYNC_DEFAULT_ADDRESS "https://cloudsync.sqlite.ai"
#define CLOUDSYNC_ENDPOINT_PREFIX "v2/cloudsync/databases"
#define CLOUDSYNC_ENDPOINT_UPLOAD "upload"
@@ -20,7 +23,7 @@
#define CLOUDSYNC_HEADER_TICKET_EXPIRES_AT "X-CloudSync-Ticket-Expires-At"
// CLOUDSYNC_VERSION is defined in cloudsync.h — include it before this header at use sites.
#define CLOUDSYNC_HEADER_VERSION_LINE CLOUDSYNC_HEADER_VERSION ": " CLOUDSYNC_VERSION
-#define CLOUDSYNC_HEADER_CHECK_CAPABILITIES "X-CloudSync-Capabilities: check-status-response"
+#define CLOUDSYNC_HEADER_CHECK_CAPABILITIES "X-CloudSync-Capabilities: check-status-response, check-chunks"
#define CLOUDSYNC_NETWORK_OK 1
#define CLOUDSYNC_NETWORK_ERROR 2
@@ -46,6 +49,11 @@ bool network_data_set_endpoints (network_data *data, char *auth, char *check, ch
bool network_send_buffer(network_data *data, const char *endpoint, const char *authentication, const void *blob, int blob_size);
NETWORK_RESULT network_receive_buffer (network_data *data, const char *endpoint, const char *authentication, bool zero_terminated, bool is_post_request, char *json_payload, const char **extra_headers, int nextra_headers);
+// Exposed (non-static) for the network unit test; otherwise internal to network.c.
+void network_sync_state_update_from_response(NETWORK_RESULT *res, int64_t *last_optimistic_version, int64_t *last_confirmed_version, int *gaps_size, char **apply_failure_json, char **check_failure_json);
+const char *network_compute_status(int64_t last_optimistic, int64_t last_confirmed, int gaps_size, int64_t local_version);
+char *network_apply_json_payload(const char *transport_key, const char *transport_value, int64_t db_version_min, int64_t db_version_max, const char *batch_id, int chunk_index, bool is_final);
+
#ifdef CLOUDSYNC_NETWORK_TRACE
const char *network_trace_endpoint_name(network_data *data, const char *endpoint);
const char *network_trace_result_name(int code);
diff --git a/src/pk.c b/src/pk.c
index 97a66390..dcc8ca67 100644
--- a/src/pk.c
+++ b/src/pk.c
@@ -423,6 +423,56 @@ size_t pk_encode_data (char *buffer, size_t bseek, char *data, size_t datalen) {
memcpy(buffer + bseek, data, datalen);
return bseek + datalen;
}
+
+size_t pk_encode_raw_size (int type, int64_t len_or_value) {
+ switch (type) {
+ case DBTYPE_INTEGER: {
+ if (len_or_value == INT64_MIN) return 1;
+ if (len_or_value < 0) len_or_value = -len_or_value;
+ return 1 + pk_encode_nbytes_needed(len_or_value);
+ }
+ case DBTYPE_FLOAT:
+ return 1 + sizeof(uint64_t);
+ case DBTYPE_TEXT:
+ case DBTYPE_BLOB: {
+ if (len_or_value < 0) return SIZE_MAX;
+ size_t nbytes = pk_encode_nbytes_needed(len_or_value);
+ return 1 + nbytes + (size_t)len_or_value;
+ }
+ case DBTYPE_NULL:
+ return 1;
+ }
+ return SIZE_MAX;
+}
+
+size_t pk_encode_raw_int (char *buffer, int64_t value) {
+ int type = DBTYPE_INTEGER;
+ size_t bseek = 0;
+ if (value == INT64_MIN) {
+ return pk_encode_u8(buffer, bseek, DATABASE_TYPE_MAX_NEGATIVE_INTEGER);
+ }
+ if (value < 0) { value = -value; type = DATABASE_TYPE_NEGATIVE_INTEGER; }
+ size_t nbytes = pk_encode_nbytes_needed(value);
+ uint8_t type_byte = (uint8_t)((nbytes << 3) | type);
+ bseek = pk_encode_u8(buffer, bseek, type_byte);
+ return pk_encode_uint64(buffer, bseek, (uint64_t)value, nbytes);
+}
+
+size_t pk_encode_raw_text (char *buffer, const char *value, size_t len) {
+ size_t nbytes = pk_encode_nbytes_needed((int64_t)len);
+ uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_TEXT);
+ size_t bseek = pk_encode_u8(buffer, 0, type_byte);
+ bseek = pk_encode_uint64(buffer, bseek, (uint64_t)len, nbytes);
+ return pk_encode_data(buffer, bseek, (char *)value, len);
+}
+
+size_t pk_encode_raw_blob (char *buffer, const void *value, size_t len) {
+ size_t nbytes = pk_encode_nbytes_needed((int64_t)len);
+ uint8_t type_byte = (uint8_t)((nbytes << 3) | DBTYPE_BLOB);
+ size_t bseek = pk_encode_u8(buffer, 0, type_byte);
+ bseek = pk_encode_uint64(buffer, bseek, (uint64_t)len, nbytes);
+ return pk_encode_data(buffer, bseek, (char *)value, len);
+}
char *pk_encode (dbvalue_t **argv, int argc, char *b, bool is_prikey, size_t *bsize, int skip_idx) {
size_t bseek = 0;
diff --git a/src/pk.h b/src/pk.h
index ea9a3901..ab61b9f7 100644
--- a/src/pk.h
+++ b/src/pk.h
@@ -26,5 +26,9 @@ int pk_decode_bind_callback (void *xdata, int index, int type, int64_t ival,
int pk_decode_print_callback (void *xdata, int index, int type, int64_t ival, double dval, char *pval);
size_t pk_encode_size (dbvalue_t **argv, int argc, int reserved, int skip_idx);
uint64_t pk_checksum (const char *buffer, size_t blen);
+size_t pk_encode_raw_size (int type, int64_t len_or_value);
+size_t pk_encode_raw_int (char *buffer, int64_t value);
+size_t pk_encode_raw_text (char *buffer, const char *value, size_t len);
+size_t pk_encode_raw_blob (char *buffer, const void *value, size_t len);
#endif
diff --git a/src/postgresql/cloudsync.sql.in b/src/postgresql/cloudsync.sql.in
index edfa4d39..92bb6f42 100644
--- a/src/postgresql/cloudsync.sql.in
+++ b/src/postgresql/cloudsync.sql.in
@@ -149,6 +149,55 @@ CREATE OR REPLACE AGGREGATE cloudsync_payload_encode(text, bytea, text, bytea, b
FINALFUNC = cloudsync_payload_encode_finalfn
);
+CREATE OR REPLACE FUNCTION cloudsync_payload_chunks(
+ since_db_version bigint DEFAULT NULL,
+ filter_site_id bytea DEFAULT NULL,
+ until_db_version bigint DEFAULT NULL,
+ exclude_filter_site_id boolean DEFAULT false,
+ resume_db_version bigint DEFAULT NULL,
+ resume_seq bigint DEFAULT NULL,
+ resume_frag_offset bigint DEFAULT NULL
+)
+RETURNS TABLE (
+ payload bytea,
+ chunk_index bigint,
+ payload_size bigint,
+ rows bigint,
+ db_version_min bigint,
+ db_version_max bigint,
+ watermark_db_version bigint,
+ next_db_version bigint,
+ next_seq bigint,
+ next_frag_offset bigint,
+ is_final boolean
+)
+AS 'MODULE_PATHNAME', 'cloudsync_payload_chunks'
+LANGUAGE C VOLATILE;
+
+CREATE OR REPLACE FUNCTION cloudsync_payload_blob_checked(
+ since_db_version bigint,
+ since_seq bigint,
+ filter_site_id bytea,
+ exclude_filter_site_id boolean,
+ max_estimated_payload_size bigint
+)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'cloudsync_payload_blob_checked'
+LANGUAGE C VOLATILE;
+
+-- UUID binary <-> canonical string helpers (the changes table stores site_id as
+-- the 16-byte binary UUID; these let string-based callers such as the /check
+-- endpoint pass a site_id to cloudsync_payload_chunks).
+CREATE OR REPLACE FUNCTION cloudsync_uuid_text(uuid bytea, dash_format boolean DEFAULT true)
+RETURNS text
+AS 'MODULE_PATHNAME', 'cloudsync_uuid_text'
+LANGUAGE C IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION cloudsync_uuid_blob(uuid text)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'cloudsync_uuid_blob'
+LANGUAGE C IMMUTABLE;
+
-- Payload decoding and application
CREATE OR REPLACE FUNCTION cloudsync_payload_decode(payload bytea)
RETURNS integer
diff --git a/src/postgresql/cloudsync_postgresql.c b/src/postgresql/cloudsync_postgresql.c
index 4d0ed6af..3f62d2f6 100644
--- a/src/postgresql/cloudsync_postgresql.c
+++ b/src/postgresql/cloudsync_postgresql.c
@@ -194,10 +194,41 @@ Datum cloudsync_uuid (PG_FUNCTION_ARGS) {
// Parse into PostgreSQL UUID type
Datum uuid_datum = DirectFunctionCall1(uuid_in, CStringGetDatum(uuid_str));
-
+
PG_RETURN_DATUM(uuid_datum);
}
+// cloudsync_uuid_text(bytea, [dash_format]) - 16-byte UUID -> canonical string
+PG_FUNCTION_INFO_V1(cloudsync_uuid_text);
+Datum cloudsync_uuid_text (PG_FUNCTION_ARGS) {
+ if (PG_ARGISNULL(0)) PG_RETURN_NULL();
+ bytea *b = PG_GETARG_BYTEA_PP(0);
+ if (VARSIZE_ANY_EXHDR(b) != UUID_LEN) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cloudsync_uuid_text: expected a 16-byte value")));
+ }
+ bool dash_format = PG_ARGISNULL(1) ? true : PG_GETARG_BOOL(1);
+ char uuid_str[UUID_STR_MAXLEN];
+ cloudsync_uuid_v7_stringify((uint8_t *)VARDATA_ANY(b), uuid_str, dash_format);
+ PG_RETURN_TEXT_P(cstring_to_text(uuid_str));
+}
+
+// cloudsync_uuid_blob(text) - UUID string -> 16-byte value (dashed/undashed)
+PG_FUNCTION_INFO_V1(cloudsync_uuid_blob);
+Datum cloudsync_uuid_blob (PG_FUNCTION_ARGS) {
+ if (PG_ARGISNULL(0)) PG_RETURN_NULL();
+ text *t = PG_GETARG_TEXT_PP(0);
+ uint8_t uuid[UUID_LEN];
+ if (cloudsync_uuid_v7_parse(VARDATA_ANY(t), (int)VARSIZE_ANY_EXHDR(t), uuid) != 0) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cloudsync_uuid_blob: malformed UUID string")));
+ }
+ bytea *result = (bytea *)palloc(VARHDRSZ + UUID_LEN);
+ SET_VARSIZE(result, VARHDRSZ + UUID_LEN);
+ memcpy(VARDATA(result), uuid, UUID_LEN);
+ PG_RETURN_BYTEA_P(result);
+}
+
// cloudsync_db_version() - Get current database version
PG_FUNCTION_INFO_V1(cloudsync_db_version);
Datum cloudsync_db_version (PG_FUNCTION_ARGS) {
@@ -1002,6 +1033,668 @@ Datum cloudsync_payload_encode_finalfn (PG_FUNCTION_ARGS) {
PG_RETURN_BYTEA_P(result);
}
+typedef struct {
+ Portal portal;
+ TupleDesc outdesc;
+ SPITupleTable *current_tuptable;
+ // Context for the per-row fields below that must outlive a single SRF call:
+ // a single oversized value emits its fragments over multiple SRF_PERCALL
+ // invocations and re-reads tbl/pk/col_name/col_value/site_id on each. The
+ // SRF sets this to funcctx->multi_call_memory_ctx (the only context the SRF
+ // protocol guarantees survives between calls). Non-SRF callers leave it NULL
+ // and allocate in the current context, freeing each row as they iterate.
+ MemoryContext value_ctx;
+ bool spi_connected;
+ bool has_current;
+ bool eof;
+ int64 chunk_index;
+ int64 watermark;
+ int max_size;
+ int frag_target;
+
+ char *tbl;
+ bytea *pk;
+ char *col_name;
+ bytea *col_value;
+ bool col_value_owned;
+ int64 col_version;
+ int64 db_version;
+ bytea *site_id;
+ int64 cl;
+ int64 seq;
+
+ bool frag_active;
+ int frag_part;
+ int frag_count;
+ int64 frag_offset;
+ int64 frag_total;
+ uint64 frag_checksum;
+} PayloadChunksState;
+
+static void payload_chunks_free_current(PayloadChunksState *st) {
+ if (!st) return;
+ if (st->tbl) pfree(st->tbl);
+ if (st->pk) pfree(st->pk);
+ if (st->col_name) pfree(st->col_name);
+ if (st->col_value && st->col_value_owned) pfree(st->col_value);
+ if (st->site_id) pfree(st->site_id);
+ if (st->current_tuptable) SPI_freetuptable(st->current_tuptable);
+ st->tbl = NULL;
+ st->pk = NULL;
+ st->col_name = NULL;
+ st->col_value = NULL;
+ st->col_value_owned = false;
+ st->site_id = NULL;
+ st->current_tuptable = NULL;
+ st->has_current = false;
+}
+
+static bool payload_chunks_fetch_current(PayloadChunksState *st) {
+ if (st->has_current) return true;
+ if (st->eof) return false;
+ SPI_cursor_fetch(st->portal, true, 1);
+ if (SPI_processed == 0) {
+ if (SPI_tuptable) { SPI_freetuptable(SPI_tuptable); SPI_tuptable = NULL; }
+ st->eof = true;
+ return false;
+ }
+
+ st->current_tuptable = SPI_tuptable;
+ HeapTuple tup = SPI_tuptable->vals[0];
+ TupleDesc td = SPI_tuptable->tupdesc;
+ bool isnull = false;
+ Datum d;
+
+ // These fields are re-read on later SRF calls while emitting fragments of a
+ // single oversized value, so they must be allocated in a context that
+ // survives between calls (value_ctx == multi_call_memory_ctx for the SRF).
+ // Includes any bytea detoasted by DatumGetByteaPP below. Non-SRF callers
+ // (value_ctx == NULL) keep the prior per-call allocation behavior.
+ MemoryContext old_value_ctx = st->value_ctx ? MemoryContextSwitchTo(st->value_ctx) : NULL;
+
+ d = SPI_getbinval(tup, td, 1, &isnull);
+ st->tbl = isnull ? pstrdup("") : text_to_cstring(DatumGetTextPP(d));
+ d = SPI_getbinval(tup, td, 2, &isnull);
+ if (!isnull) {
+ bytea *b = DatumGetByteaPP(d);
+ size_t n = VARSIZE_ANY(b);
+ st->pk = (bytea *)palloc(n);
+ memcpy(st->pk, b, n);
+ // DatumGetByteaPP returns a fresh copy when the datum was toasted; free
+ // it after the memcpy so a scan with toasted pks does not retain one
+ // detoast temp per row in value_ctx until the SRF ends.
+ if ((Pointer) b != DatumGetPointer(d)) pfree(b);
+ }
+ d = SPI_getbinval(tup, td, 3, &isnull);
+ st->col_name = isnull ? pstrdup("") : text_to_cstring(DatumGetTextPP(d));
+ d = SPI_getbinval(tup, td, 4, &isnull);
+ if (!isnull) {
+ bytea *b = DatumGetByteaPP(d);
+ st->col_value = b;
+ st->col_value_owned = ((Pointer) b != DatumGetPointer(d));
+ }
+ d = SPI_getbinval(tup, td, 5, &isnull); st->col_version = isnull ? 0 : DatumGetInt64(d);
+ d = SPI_getbinval(tup, td, 6, &isnull); st->db_version = isnull ? 0 : DatumGetInt64(d);
+ d = SPI_getbinval(tup, td, 7, &isnull);
+ if (!isnull) {
+ bytea *b = DatumGetByteaPP(d);
+ size_t n = VARSIZE_ANY(b);
+ st->site_id = (bytea *)palloc(n);
+ memcpy(st->site_id, b, n);
+ if ((Pointer) b != DatumGetPointer(d)) pfree(b);
+ }
+ d = SPI_getbinval(tup, td, 8, &isnull); st->cl = isnull ? 0 : DatumGetInt64(d);
+ d = SPI_getbinval(tup, td, 9, &isnull); st->seq = isnull ? 0 : DatumGetInt64(d);
+
+ if (old_value_ctx) MemoryContextSwitchTo(old_value_ctx);
+
+ SPI_tuptable = NULL;
+ st->has_current = true;
+ return true;
+}
+
+static void payload_chunks_make_pgvalues(PayloadChunksState *st, pgvalue_t **vals, text **owned_texts) {
+ owned_texts[0] = cstring_to_text(st->tbl);
+ owned_texts[1] = cstring_to_text(st->col_name);
+ vals[0] = pgvalue_create(PointerGetDatum(owned_texts[0]), TEXTOID, -1, InvalidOid, false);
+ vals[1] = pgvalue_create(PointerGetDatum(st->pk), BYTEAOID, -1, InvalidOid, false);
+ vals[2] = pgvalue_create(PointerGetDatum(owned_texts[1]), TEXTOID, -1, InvalidOid, false);
+ vals[3] = pgvalue_create(PointerGetDatum(st->col_value), BYTEAOID, -1, InvalidOid, false);
+ vals[4] = pgvalue_create(Int64GetDatum(st->col_version), INT8OID, -1, InvalidOid, false);
+ vals[5] = pgvalue_create(Int64GetDatum(st->db_version), INT8OID, -1, InvalidOid, false);
+ vals[6] = pgvalue_create(PointerGetDatum(st->site_id), BYTEAOID, -1, InvalidOid, false);
+ vals[7] = pgvalue_create(Int64GetDatum(st->cl), INT8OID, -1, InvalidOid, false);
+ vals[8] = pgvalue_create(Int64GetDatum(st->seq), INT8OID, -1, InvalidOid, false);
+}
+
+static void payload_chunks_free_pgvalues(pgvalue_t **vals, text **owned_texts) {
+ for (int i = 0; i < 9; ++i) if (vals[i]) pgvalue_free(vals[i]);
+ if (owned_texts[0]) pfree(owned_texts[0]);
+ if (owned_texts[1]) pfree(owned_texts[1]);
+}
+
+static bytea *payload_chunks_emit_pg_fragment(PayloadChunksState *st, cloudsync_context *data,
+ int64 *rows, int64 *dbv_min, int64 *dbv_max) {
+ int64 remaining = st->frag_total - st->frag_offset;
+ int frag_len = remaining > st->frag_target ? st->frag_target : (int)remaining;
+ if (frag_len <= 0) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("invalid payload fragment size")));
+ const char *src = VARDATA_ANY(st->col_value) + st->frag_offset;
+
+ cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL));
+ if (!payload) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
+ int rc = cloudsync_payload_encode_fragment_step(payload, data,
+ st->tbl, -1,
+ VARDATA_ANY(st->pk), VARSIZE_ANY_EXHDR(st->pk),
+ st->col_name, -1,
+ src, frag_len,
+ st->col_version, st->db_version,
+ VARDATA_ANY(st->site_id), VARSIZE_ANY_EXHDR(st->site_id),
+ st->cl, st->seq,
+ st->frag_checksum, st->frag_total, st->frag_part, st->frag_count);
+ if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data))));
+ rc = cloudsync_payload_encode_final(payload, data);
+ if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data))));
+ int64 blob_size = 0;
+ char *blob = cloudsync_payload_blob(payload, &blob_size, rows);
+ bytea *result = (bytea *)palloc(VARHDRSZ + blob_size);
+ SET_VARSIZE(result, VARHDRSZ + blob_size);
+ memcpy(VARDATA(result), blob, blob_size);
+ cloudsync_memory_free(blob);
+ cloudsync_memory_free(payload);
+
+ *dbv_min = st->db_version;
+ *dbv_max = st->db_version;
+ st->frag_offset += frag_len;
+ st->frag_part++;
+ if (st->frag_part >= st->frag_count) {
+ st->frag_active = false;
+ payload_chunks_free_current(st);
+ }
+ return result;
+}
+
+// Set up fragment state for the currently-fetched oversized value so
+// emit_pg_fragment can stream it. start_offset is the byte offset within the value
+// to resume from (0 when first reaching it; >0 when a positional cursor resumes
+// mid-value). frag_part is derived from the offset so a streamed and a resumed
+// fragment carry the same part index. The plan (frag_target/frag_count) is a
+// deterministic function of the row, so a resumed fragment tiles identically.
+static void payload_chunks_pg_begin_fragment(PayloadChunksState *st, cloudsync_context *data, int64 start_offset) {
+ st->frag_total = VARSIZE_ANY_EXHDR(st->col_value);
+ st->frag_offset = start_offset;
+ st->frag_target = cloudsync_payload_fragment_data_size(data,
+ st->tbl, -1,
+ VARDATA_ANY(st->pk), VARSIZE_ANY_EXHDR(st->pk),
+ st->col_name, -1,
+ st->col_version, st->db_version,
+ VARDATA_ANY(st->site_id), VARSIZE_ANY_EXHDR(st->site_id),
+ st->cl, st->seq,
+ st->frag_total, 0, 1);
+ if (st->frag_target <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg(CLOUDSYNC_ERRCODE_CHUNK_TOO_LARGE "payload fragment metadata exceeds max chunk size")));
+ for (int i = 0; i < CLOUDSYNC_PAYLOAD_FRAGMENT_SIZE_FIXPOINT_ITERATIONS; ++i) {
+ int count = cloudsync_payload_fragment_count(st->frag_total, st->frag_target);
+ if (count <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg(CLOUDSYNC_ERRCODE_CHUNK_TOO_LARGE "payload requires too many fragments")));
+ int planned = cloudsync_payload_fragment_data_size(data,
+ st->tbl, -1,
+ VARDATA_ANY(st->pk), VARSIZE_ANY_EXHDR(st->pk),
+ st->col_name, -1,
+ st->col_version, st->db_version,
+ VARDATA_ANY(st->site_id), VARSIZE_ANY_EXHDR(st->site_id),
+ st->cl, st->seq,
+ st->frag_total, count - 1, count);
+ if (planned <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg(CLOUDSYNC_ERRCODE_CHUNK_TOO_LARGE "payload fragment metadata exceeds max chunk size")));
+ if (planned == st->frag_target) break;
+ st->frag_target = planned;
+ }
+ st->frag_count = cloudsync_payload_fragment_count(st->frag_total, st->frag_target);
+ if (st->frag_count <= 0) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("payload requires too many fragments")));
+ st->frag_part = (st->frag_target > 0) ? (int)(start_offset / st->frag_target) : 0;
+ st->frag_checksum = pk_checksum(VARDATA_ANY(st->col_value), (size_t)st->frag_total);
+ st->frag_active = true;
+}
+
+static bytea *payload_chunks_build_pg_next(PayloadChunksState *st, cloudsync_context *data,
+ int64 *rows, int64 *dbv_min, int64 *dbv_max) {
+ *rows = *dbv_min = *dbv_max = 0;
+ if (st->frag_active) return payload_chunks_emit_pg_fragment(st, data, rows, dbv_min, dbv_max);
+ if (!payload_chunks_fetch_current(st)) return NULL;
+
+ size_t header_size = 0;
+ cloudsync_payload_context_size(&header_size);
+ cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL));
+ if (!payload) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
+
+ while (payload_chunks_fetch_current(st)) {
+ size_t row_size = 0;
+ {
+ pgvalue_t *vals[9] = {0};
+ text *owned_texts[2] = {0};
+ payload_chunks_make_pgvalues(st, vals, owned_texts);
+ row_size = pk_encode_size((dbvalue_t **)vals, 9, 0, 3);
+ payload_chunks_free_pgvalues(vals, owned_texts);
+ }
+ if (row_size == SIZE_MAX) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg(CLOUDSYNC_ERRCODE_ROW_TOO_LARGE "payload row too large")));
+
+ if ((int64)row_size + (int64)header_size + CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN > st->max_size) {
+ if (cloudsync_payload_context_nrows(payload) > 0) break;
+ payload_chunks_pg_begin_fragment(st, data, 0);
+ cloudsync_memory_free(payload);
+ return payload_chunks_emit_pg_fragment(st, data, rows, dbv_min, dbv_max);
+ }
+
+ if (cloudsync_payload_context_nrows(payload) > 0 && cloudsync_payload_context_bused(payload) + row_size > (size_t)st->max_size) break;
+
+ pgvalue_t *vals[9] = {0};
+ text *owned_texts[2] = {0};
+ payload_chunks_make_pgvalues(st, vals, owned_texts);
+ int rc = cloudsync_payload_encode_step(payload, data, 9, (dbvalue_t **)vals);
+ payload_chunks_free_pgvalues(vals, owned_texts);
+ if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data))));
+
+ if (cloudsync_payload_context_nrows(payload) == 1) *dbv_min = st->db_version;
+ *dbv_max = st->db_version;
+ payload_chunks_free_current(st);
+ }
+
+ if (cloudsync_payload_context_nrows(payload) == 0) {
+ cloudsync_memory_free(payload);
+ return NULL;
+ }
+ int rc = cloudsync_payload_encode_final(payload, data);
+ if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data))));
+ int64 blob_size = 0;
+ char *blob = cloudsync_payload_blob(payload, &blob_size, rows);
+ bytea *result = (bytea *)palloc(VARHDRSZ + blob_size);
+ SET_VARSIZE(result, VARHDRSZ + blob_size);
+ memcpy(VARDATA(result), blob, blob_size);
+ cloudsync_memory_free(blob);
+ cloudsync_memory_free(payload);
+ return result;
+}
+
+PG_FUNCTION_INFO_V1(cloudsync_payload_chunks);
+Datum cloudsync_payload_chunks(PG_FUNCTION_ARGS) {
+ FuncCallContext *funcctx;
+ cloudsync_context *data = get_cloudsync_context();
+
+ if (SRF_IS_FIRSTCALL()) {
+ funcctx = SRF_FIRSTCALL_INIT();
+ MemoryContext oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ PayloadChunksState *st = palloc0(sizeof(*st));
+ st->chunk_index = 0;
+ // Per-row fields that span multiple SRF_PERCALL calls (fragment emission)
+ // must be allocated here, not in the transient per-call context.
+ st->value_ctx = funcctx->multi_call_memory_ctx;
+
+ if (SPI_connect() != SPI_OK_CONNECT) ereport(ERROR, (errmsg("SPI_connect failed")));
+ st->spi_connected = true;
+ st->max_size = cloudsync_payload_max_chunk_size(data);
+ size_t header_size_tmp = 0;
+ cloudsync_payload_context_size(&header_size_tmp);
+ st->frag_target = st->max_size - (int)header_size_tmp - CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN;
+ if (st->frag_target < 1024) st->frag_target = 1024;
+
+ int64 since = PG_ARGISNULL(0) ? dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION) : PG_GETARG_INT64(0);
+ bytea *site_id = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_PP(1);
+ bool exclude = PG_ARGISNULL(3) ? false : PG_GETARG_BOOL(3);
+ // Positional resume cursor: when resume_db_version is given the scan starts
+ // at (resume_db_version, resume_seq) inclusive and the first chunk resumes a
+ // mid-value fragment at resume_frag_offset, instead of replaying from `since`.
+ // Lets the /check job page one chunk per round-trip with an O(1) seek and no
+ // spool table.
+ bool positional = !PG_ARGISNULL(4);
+ int64 resume_dbv = PG_ARGISNULL(4) ? 0 : PG_GETARG_INT64(4);
+ int64 resume_seq = PG_ARGISNULL(5) ? 0 : PG_GETARG_INT64(5);
+ int64 resume_frag = PG_ARGISNULL(6) ? 0 : PG_GETARG_INT64(6);
+ // Site filter resolution:
+ // exclude=true -> all sites except filter_site_id (CHECK path); site required
+ // filter given -> only that site
+ // default -> local site (send path, unchanged)
+ if (exclude && !site_id) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cloudsync_payload_chunks: exclude_filter_site_id requires a non-NULL filter_site_id")));
+ }
+ if (!exclude && !site_id) {
+ site_id = (bytea *)palloc(VARHDRSZ + UUID_LEN);
+ SET_VARSIZE(site_id, VARHDRSZ + UUID_LEN);
+ memcpy(VARDATA(site_id), cloudsync_siteid(data), UUID_LEN);
+ }
+
+ int64 until = PG_ARGISNULL(2) ? 0 : PG_GETARG_INT64(2);
+ if (until == 0) {
+ Oid mt[1] = {BYTEAOID};
+ Datum mv[1] = {PointerGetDatum(site_id)};
+ char mn[1] = {' '};
+ const char *mxq = exclude
+ ? "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,NULL) WHERE site_id <> $1"
+ : "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,$1)";
+ int mrc = SPI_execute_with_args(mxq, 1, mt, mv, mn, true, 1);
+ if (mrc == SPI_OK_SELECT && SPI_processed > 0) {
+ bool isnull = false;
+ Datum d = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull);
+ until = isnull ? 0 : DatumGetInt64(d);
+ }
+ if (SPI_tuptable) { SPI_freetuptable(SPI_tuptable); SPI_tuptable = NULL; }
+ }
+ st->watermark = until;
+
+ StringInfoData q;
+ initStringInfo(&q);
+ if (positional) {
+ // Inclusive positional lower bound (db_version, seq) >= (resume_dbv,
+ // resume_seq) within db_version <= until. $1=site, $2=until, $3=resume_dbv,
+ // $4=resume_seq. (seq >= matches the SQLite vtab's exact tiling; contrast
+ // with payload_blob_checked's exclusive seq > for its last-applied cursor.)
+ if (exclude) {
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select(0,NULL) "
+ "WHERE site_id <> $1 AND db_version <= $2 AND (db_version > $3 OR (db_version = $3 AND seq >= $4)) "
+ "ORDER BY db_version, seq ASC");
+ } else {
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select(0,$1) "
+ "WHERE db_version <= $2 AND (db_version > $3 OR (db_version = $3 AND seq >= $4)) "
+ "ORDER BY db_version, seq ASC");
+ }
+ Oid argtypes[4] = {BYTEAOID, INT8OID, INT8OID, INT8OID};
+ Datum values[4] = {PointerGetDatum(site_id), Int64GetDatum(until), Int64GetDatum(resume_dbv), Int64GetDatum(resume_seq)};
+ char nulls[4] = {' ', ' ', ' ', ' '};
+ st->portal = SPI_cursor_open_with_args(NULL, q.data, 4, argtypes, values, nulls, true, 0);
+ } else {
+ if (exclude) {
+ // $1=since (into changes_select), $2=site to exclude, $3=until watermark
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select($1,NULL) WHERE site_id <> $2 AND db_version <= $3 ORDER BY db_version, seq ASC");
+ } else {
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select($1,$2) WHERE db_version <= $3 ORDER BY db_version, seq ASC");
+ }
+ Oid argtypes[3] = {INT8OID, BYTEAOID, INT8OID};
+ Datum values[3] = {Int64GetDatum(since), PointerGetDatum(site_id), Int64GetDatum(until)};
+ char nulls[3] = {' ', ' ', ' '};
+ st->portal = SPI_cursor_open_with_args(NULL, q.data, 3, argtypes, values, nulls, true, 0);
+ }
+ pfree(q.data);
+ if (!st->portal) ereport(ERROR, (errmsg("SPI_cursor_open failed")));
+
+ // Resuming inside a value that was fragmented across chunks: the first row is
+ // that value; re-establish the fragment plan and skip to resume_frag.
+ if (positional && resume_frag > 0 && payload_chunks_fetch_current(st)) {
+ payload_chunks_pg_begin_fragment(st, data, resume_frag);
+ }
+
+ TupleDesc outdesc;
+ if (get_call_result_type(fcinfo, NULL, &outdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errmsg("return type must be composite")));
+ st->outdesc = BlessTupleDesc(outdesc);
+ funcctx->user_fctx = st;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ PayloadChunksState *st = (PayloadChunksState *)funcctx->user_fctx;
+
+ int64 rows = 0, dbv_min = 0, dbv_max = 0;
+ bytea *payload = payload_chunks_build_pg_next(st, data, &rows, &dbv_min, &dbv_max);
+ if (!payload) {
+ if (st->portal) SPI_cursor_close(st->portal);
+ st->portal = NULL;
+ if (st->spi_connected) SPI_finish();
+ st->spi_connected = false;
+ payload_chunks_free_current(st);
+ MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
+ SRF_RETURN_DONE(funcctx);
+ }
+
+ // Resume point a stateless caller passes back to continue after this chunk.
+ // frag_active -> same value, next byte offset; otherwise peek the next row
+ // (buffered for the following build call): a row -> its (db_version, seq);
+ // end of stream -> this was the final chunk.
+ int64 next_dbv, next_seq, next_frag;
+ bool is_final;
+ if (st->frag_active) {
+ next_dbv = st->db_version; next_seq = st->seq; next_frag = st->frag_offset; is_final = false;
+ } else if (payload_chunks_fetch_current(st)) {
+ next_dbv = st->db_version; next_seq = st->seq; next_frag = 0; is_final = false;
+ } else {
+ next_dbv = st->watermark; next_seq = 0; next_frag = 0; is_final = true;
+ }
+
+ Datum outvals[11];
+ bool outnulls[11] = {false,false,false,false,false,false,false,false,false,false,false};
+ outvals[0] = PointerGetDatum(payload);
+ outvals[1] = Int64GetDatum(st->chunk_index++);
+ outvals[2] = Int64GetDatum(VARSIZE_ANY_EXHDR(payload));
+ outvals[3] = Int64GetDatum(rows);
+ outvals[4] = Int64GetDatum(dbv_min);
+ outvals[5] = Int64GetDatum(dbv_max);
+ outvals[6] = Int64GetDatum(st->watermark);
+ outvals[7] = Int64GetDatum(next_dbv);
+ outvals[8] = Int64GetDatum(next_seq);
+ outvals[9] = Int64GetDatum(next_frag);
+ outvals[10] = BoolGetDatum(is_final);
+ HeapTuple outtup = heap_form_tuple(st->outdesc, outvals, outnulls);
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(outtup));
+}
+
+static void payload_blob_checked_pg_add(int64 *acc, int64 value) {
+ if (value < 0 || *acc > PG_INT64_MAX - value) {
+ ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg(CLOUDSYNC_ERRCODE_PAYLOAD_TOO_LARGE "cloudsync_payload_blob_checked: payload estimate is too large")));
+ }
+ *acc += value;
+}
+
+static int64 payload_blob_checked_pg_estimate(cloudsync_context *data, int64 since, int64 since_seq,
+ bytea *site_id, bool exclude, int64 *watermark) {
+ PayloadChunksState st = {0};
+ int64 estimated = 0;
+ bool has_rows = false;
+
+ int64 until = 0;
+ Oid mt[1] = {BYTEAOID};
+ Datum mv[1] = {PointerGetDatum(site_id)};
+ char mn[1] = {' '};
+ const char *mxq = exclude
+ ? "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,NULL) WHERE site_id <> $1"
+ : "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes_select(0,$1)";
+ int mrc = SPI_execute_with_args(mxq, 1, mt, mv, mn, true, 1);
+ if (mrc == SPI_OK_SELECT && SPI_processed > 0) {
+ bool isnull = false;
+ Datum d = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull);
+ until = isnull ? 0 : DatumGetInt64(d);
+ } else {
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("cloudsync_payload_blob_checked: failed to capture payload watermark")));
+ }
+ if (SPI_tuptable) { SPI_freetuptable(SPI_tuptable); SPI_tuptable = NULL; }
+ if (watermark) *watermark = until;
+
+ if (until >= since) {
+ StringInfoData q;
+ initStringInfo(&q);
+ if (exclude) {
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select(0,NULL) "
+ "WHERE site_id <> $2 AND db_version <= $3 AND (db_version > $1 OR (db_version = $1 AND seq > $4)) "
+ "ORDER BY db_version, seq ASC");
+ } else {
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select(0,$2) "
+ "WHERE db_version <= $3 AND (db_version > $1 OR (db_version = $1 AND seq > $4)) "
+ "ORDER BY db_version, seq ASC");
+ }
+ Oid argtypes[4] = {INT8OID, BYTEAOID, INT8OID, INT8OID};
+ Datum values[4] = {Int64GetDatum(since), PointerGetDatum(site_id), Int64GetDatum(until), Int64GetDatum(since_seq)};
+ char nulls[4] = {' ', ' ', ' ', ' '};
+ st.portal = SPI_cursor_open_with_args(NULL, q.data, 4, argtypes, values, nulls, true, 0);
+ pfree(q.data);
+ if (!st.portal) ereport(ERROR, (errmsg("SPI_cursor_open failed")));
+
+ size_t header_size = 0;
+ cloudsync_payload_context_size(&header_size);
+ if (header_size > (size_t)PG_INT64_MAX) {
+ ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg(CLOUDSYNC_ERRCODE_PAYLOAD_TOO_LARGE "cloudsync_payload_blob_checked: payload estimate is too large")));
+ }
+
+ while (payload_chunks_fetch_current(&st)) {
+ pgvalue_t *vals[9] = {0};
+ text *owned_texts[2] = {0};
+ payload_chunks_make_pgvalues(&st, vals, owned_texts);
+ size_t row_size = pk_encode_size((dbvalue_t **)vals, 9, 0, 3);
+ payload_chunks_free_pgvalues(vals, owned_texts);
+ if (row_size == SIZE_MAX || row_size > (size_t)PG_INT64_MAX) {
+ ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg(CLOUDSYNC_ERRCODE_ROW_TOO_LARGE "cloudsync_payload_blob_checked: payload row is too large")));
+ }
+ if (!has_rows) {
+ payload_blob_checked_pg_add(&estimated, (int64)header_size);
+ has_rows = true;
+ }
+ payload_blob_checked_pg_add(&estimated, (int64)row_size);
+ payload_chunks_free_current(&st);
+ }
+ }
+
+ if (st.portal) SPI_cursor_close(st.portal);
+ payload_chunks_free_current(&st);
+ return estimated;
+}
+
+PG_FUNCTION_INFO_V1(cloudsync_payload_blob_checked);
+Datum cloudsync_payload_blob_checked(PG_FUNCTION_ARGS) {
+ cloudsync_context *data = get_cloudsync_context();
+ bool spi_connected = false;
+ Portal portal = NULL;
+ PayloadChunksState encode_st = {0};
+ cloudsync_payload_context *payload = NULL;
+ bytea *result = NULL;
+ bytea *site_id = PG_ARGISNULL(2) ? NULL : PG_GETARG_BYTEA_PP(2);
+ bool exclude = PG_ARGISNULL(3) ? false : PG_GETARG_BOOL(3);
+
+ if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(4)) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cloudsync_payload_blob_checked: since_db_version, since_seq, and max_estimated_payload_size are required")));
+ }
+ int64 since = PG_GETARG_INT64(0);
+ int64 since_seq = PG_GETARG_INT64(1);
+ int64 max_estimated_size = PG_GETARG_INT64(4);
+ if (max_estimated_size <= 0) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cloudsync_payload_blob_checked: max_estimated_payload_size must be positive")));
+ }
+ if (exclude && !site_id) {
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cloudsync_payload_blob_checked: exclude_filter_site_id requires a non-NULL filter_site_id")));
+ }
+ if (!exclude && !site_id) {
+ site_id = (bytea *)palloc(VARHDRSZ + UUID_LEN);
+ SET_VARSIZE(site_id, VARHDRSZ + UUID_LEN);
+ memcpy(VARDATA(site_id), cloudsync_siteid(data), UUID_LEN);
+ }
+
+ if (SPI_connect() != SPI_OK_CONNECT) ereport(ERROR, (errmsg("SPI_connect failed")));
+ spi_connected = true;
+
+ int64 watermark = 0;
+ int64 estimated = payload_blob_checked_pg_estimate(data, since, since_seq, site_id, exclude, &watermark);
+ if (estimated == 0) {
+ if (spi_connected) SPI_finish();
+ PG_RETURN_NULL();
+ }
+ if (estimated > max_estimated_size) {
+ if (spi_connected) SPI_finish();
+ ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg(CLOUDSYNC_ERRCODE_PAYLOAD_TOO_LARGE "cloudsync_payload_blob_checked: estimated payload size %lld exceeds max_estimated_payload_size %lld",
+ (long long)estimated, (long long)max_estimated_size)));
+ }
+
+ PG_TRY();
+ {
+ StringInfoData q;
+ initStringInfo(&q);
+ if (exclude) {
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select(0,NULL) "
+ "WHERE site_id <> $2 AND db_version <= $3 AND (db_version > $1 OR (db_version = $1 AND seq > $4)) "
+ "ORDER BY db_version, seq ASC");
+ } else {
+ appendStringInfoString(&q,
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes_select(0,$2) "
+ "WHERE db_version <= $3 AND (db_version > $1 OR (db_version = $1 AND seq > $4)) "
+ "ORDER BY db_version, seq ASC");
+ }
+ Oid argtypes[4] = {INT8OID, BYTEAOID, INT8OID, INT8OID};
+ Datum values[4] = {Int64GetDatum(since), PointerGetDatum(site_id), Int64GetDatum(watermark), Int64GetDatum(since_seq)};
+ char nulls[4] = {' ', ' ', ' ', ' '};
+ portal = SPI_cursor_open_with_args(NULL, q.data, 4, argtypes, values, nulls, true, 0);
+ pfree(q.data);
+ if (!portal) ereport(ERROR, (errmsg("SPI_cursor_open failed")));
+ encode_st.portal = portal;
+
+ payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL));
+ if (!payload) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
+
+ while (payload_chunks_fetch_current(&encode_st)) {
+ pgvalue_t *vals[9] = {0};
+ text *owned_texts[2] = {0};
+ payload_chunks_make_pgvalues(&encode_st, vals, owned_texts);
+ int rc = cloudsync_payload_encode_step(payload, data, 9, (dbvalue_t **)vals);
+ payload_chunks_free_pgvalues(vals, owned_texts);
+ if (rc != DBRES_OK) {
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data))));
+ }
+ payload_chunks_free_current(&encode_st);
+ }
+ SPI_cursor_close(portal);
+ portal = NULL;
+ encode_st.portal = NULL;
+
+ int rc = cloudsync_payload_encode_final(payload, data);
+ if (rc != DBRES_OK) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data))));
+ int64 blob_size = 0;
+ char *blob = cloudsync_payload_blob(payload, &blob_size, NULL);
+ if (spi_connected) {
+ SPI_finish();
+ spi_connected = false;
+ }
+ // NOTE: PG_RETURN_* expands to `return`, so returning from inside the
+ // PG_TRY block would skip PG_END_TRY() and leave PG_exception_stack
+ // pointing at this (now-dead) frame; a later ereport(ERROR) in the same
+ // query then siglongjmp()s into freed stack and segfaults. Compute the
+ // result here, return it after PG_END_TRY(). result == NULL means the
+ // empty-blob path (return SQL NULL).
+ if (blob) {
+ result = (bytea *)palloc(VARHDRSZ + blob_size);
+ SET_VARSIZE(result, VARHDRSZ + blob_size);
+ memcpy(VARDATA(result), blob, blob_size);
+ }
+ cloudsync_payload_context_free(payload);
+ payload = NULL;
+ }
+ PG_CATCH();
+ {
+ if (portal) SPI_cursor_close(portal);
+ payload_chunks_free_current(&encode_st);
+ if (payload) cloudsync_payload_context_free(payload);
+ if (spi_connected) SPI_finish();
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ // Return outside the PG_TRY so PG_END_TRY() always restores PG_exception_stack.
+ if (!result) PG_RETURN_NULL();
+ PG_RETURN_BYTEA_P(result);
+}
+
// Payload decode - Apply changes from payload
PG_FUNCTION_INFO_V1(cloudsync_payload_decode);
Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) {
@@ -1009,7 +1702,7 @@ Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) {
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("payload cannot be NULL")));
}
- bytea *payload_data = PG_GETARG_BYTEA_P(0);
+ bytea *payload_data = PG_GETARG_BYTEA_P_COPY(0);
int blen = VARSIZE(payload_data) - VARHDRSZ;
// Sanity check payload size
@@ -1033,10 +1726,13 @@ Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) {
PG_TRY();
{
- rc = cloudsync_payload_apply(data, payload, blen, &nrows);
+ // PostgreSQL applies a complete monolithic payload: legacy last-applied
+ // checkpoint (ends on a db_version boundary, so it is safe).
+ rc = cloudsync_payload_apply(data, payload, blen, &nrows, CLOUDSYNC_CHECKPOINT_LAST_APPLIED, 0);
}
PG_CATCH();
{
+ if (payload_data) pfree(payload_data);
if (spi_connected) SPI_finish();
PG_RE_THROW();
}
@@ -1044,8 +1740,10 @@ Datum cloudsync_payload_decode (PG_FUNCTION_ARGS) {
if (spi_connected) SPI_finish();
if (rc != DBRES_OK) {
+ if (payload_data) pfree(payload_data);
ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("%s", cloudsync_errmsg(data))));
}
+ if (payload_data) pfree(payload_data);
PG_RETURN_INT32(nrows);
}
@@ -2668,10 +3366,17 @@ static char * build_union_sql (void) {
PG_FUNCTION_INFO_V1(cloudsync_changes_select);
Datum cloudsync_changes_select(PG_FUNCTION_ARGS) {
- FuncCallContext *funcctx;
+ FuncCallContext *funcctx = NULL;
SRFState *st_local = NULL;
bool spi_connected_local = false;
-
+ bool srf_done = false;
+ Datum srf_result = (Datum) 0;
+
+ // NOTE: the SRF_RETURN_* macros expand to `return`, so they must run AFTER
+ // PG_END_TRY(). Returning from inside the PG_TRY block skips PG_END_TRY and
+ // leaves PG_exception_stack pointing at this (now-dead) frame; a later
+ // ereport(ERROR) in the same query then siglongjmp()s into freed stack and
+ // segfaults. Compute the result inside the guarded block, return outside it.
PG_TRY();
{
if (SRF_IS_FIRSTCALL()) {
@@ -2752,26 +3457,26 @@ Datum cloudsync_changes_select(PG_FUNCTION_ARGS) {
// Must switch to a safe context before SRF_RETURN_DONE deletes it
MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
- SRF_RETURN_DONE(funcctx);
- }
-
- HeapTuple tup = SPI_tuptable->vals[0];
- TupleDesc td = SPI_tuptable->tupdesc;
-
- Datum outvals[9];
- bool outnulls[9];
- for (int i = 0; i < 9; i++) {
- outvals[i] = SPI_getbinval(tup, td, i+1, &outnulls[i]);
- if (!outnulls[i]) {
- Form_pg_attribute att = TupleDescAttr(td, i);
- outvals[i] = datumCopy(outvals[i], att->attbyval, att->attlen);
+ srf_done = true;
+ } else {
+ HeapTuple tup = SPI_tuptable->vals[0];
+ TupleDesc td = SPI_tuptable->tupdesc;
+
+ Datum outvals[9];
+ bool outnulls[9];
+ for (int i = 0; i < 9; i++) {
+ outvals[i] = SPI_getbinval(tup, td, i+1, &outnulls[i]);
+ if (!outnulls[i]) {
+ Form_pg_attribute att = TupleDescAttr(td, i);
+ outvals[i] = datumCopy(outvals[i], att->attbyval, att->attlen);
+ }
}
+
+ HeapTuple outtup = heap_form_tuple(st->outdesc, outvals, outnulls);
+ SPI_freetuptable(SPI_tuptable);
+ SPI_tuptable = NULL;
+ srf_result = HeapTupleGetDatum(outtup);
}
-
- HeapTuple outtup = heap_form_tuple(st->outdesc, outvals, outnulls);
- SPI_freetuptable(SPI_tuptable);
- SPI_tuptable = NULL;
- SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(outtup));
}
PG_CATCH();
{
@@ -2796,6 +3501,10 @@ Datum cloudsync_changes_select(PG_FUNCTION_ARGS) {
PG_RE_THROW();
}
PG_END_TRY();
+
+ // Return outside the PG_TRY so PG_END_TRY() always restores PG_exception_stack.
+ if (srf_done) SRF_RETURN_DONE(funcctx);
+ SRF_RETURN_NEXT(funcctx, srf_result);
}
// Trigger INSERT
diff --git a/src/postgresql/migrations/cloudsync--1.0--1.1.sql b/src/postgresql/migrations/cloudsync--1.0--1.1.sql
new file mode 100644
index 00000000..1acb4810
--- /dev/null
+++ b/src/postgresql/migrations/cloudsync--1.0--1.1.sql
@@ -0,0 +1,81 @@
+-- CloudSync PostgreSQL extension upgrade: 1.0 -> 1.1
+--
+-- Adds the chunked-payload SQL surface introduced in 1.1:
+-- * cloudsync_payload_chunks() set-returning function (with the
+-- exclude_filter_site_id flag used by the /check download path)
+-- * cloudsync_payload_blob_checked() scalar helper for guarded legacy
+-- monolithic payload materialization
+-- * cloudsync_uuid_text() / cloudsync_uuid_blob() UUID conversion helpers
+-- * cloudsync_payload_fragments spool table for oversized v3 fragment
+-- values
+--
+-- Run automatically by: ALTER EXTENSION cloudsync UPDATE;
+
+-- Spool table for oversized v3 fragment values. Created here (as the
+-- extension owner) because on an already-configured database the first role
+-- to touch it may be a sync-only role without CREATE privileges; fresh
+-- setups get it from dbutils_settings_init like the other cloudsync tables.
+-- Detached from the extension right after creation so DROP EXTENSION and
+-- pg_dump treat it exactly like those runtime-created tables. The DO block
+-- skips both steps when the table already exists (detach would fail on a
+-- non-member table).
+DO $$
+BEGIN
+ IF to_regclass('cloudsync_payload_fragments') IS NULL THEN
+ CREATE TABLE cloudsync_payload_fragments (
+ value_id TEXT NOT NULL, part_index BIGINT NOT NULL, part_count BIGINT NOT NULL, total_size BIGINT NOT NULL,
+ checksum TEXT NOT NULL, created_at BIGINT NOT NULL DEFAULT (EXTRACT(EPOCH FROM now())::bigint),
+ tbl TEXT NOT NULL, pk BYTEA NOT NULL, col_name TEXT NOT NULL, col_version BIGINT NOT NULL, db_version BIGINT NOT NULL,
+ site_id BYTEA NOT NULL, cl BIGINT NOT NULL, seq BIGINT NOT NULL, fragment BYTEA NOT NULL,
+ PRIMARY KEY(value_id, part_index)
+ );
+ ALTER EXTENSION cloudsync DROP TABLE cloudsync_payload_fragments;
+ END IF;
+END $$;
+
+CREATE OR REPLACE FUNCTION cloudsync_payload_chunks(
+ since_db_version bigint DEFAULT NULL,
+ filter_site_id bytea DEFAULT NULL,
+ until_db_version bigint DEFAULT NULL,
+ exclude_filter_site_id boolean DEFAULT false,
+ resume_db_version bigint DEFAULT NULL,
+ resume_seq bigint DEFAULT NULL,
+ resume_frag_offset bigint DEFAULT NULL
+)
+RETURNS TABLE (
+ payload bytea,
+ chunk_index bigint,
+ payload_size bigint,
+ rows bigint,
+ db_version_min bigint,
+ db_version_max bigint,
+ watermark_db_version bigint,
+ next_db_version bigint,
+ next_seq bigint,
+ next_frag_offset bigint,
+ is_final boolean
+)
+AS 'MODULE_PATHNAME', 'cloudsync_payload_chunks'
+LANGUAGE C VOLATILE;
+
+CREATE OR REPLACE FUNCTION cloudsync_payload_blob_checked(
+ since_db_version bigint,
+ since_seq bigint,
+ filter_site_id bytea,
+ exclude_filter_site_id boolean,
+ max_estimated_payload_size bigint
+)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'cloudsync_payload_blob_checked'
+LANGUAGE C VOLATILE;
+
+CREATE OR REPLACE FUNCTION cloudsync_uuid_text(uuid bytea, dash_format boolean DEFAULT true)
+RETURNS text
+AS 'MODULE_PATHNAME', 'cloudsync_uuid_text'
+LANGUAGE C IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION cloudsync_uuid_blob(uuid text)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'cloudsync_uuid_blob'
+LANGUAGE C IMMUTABLE;
+
diff --git a/src/postgresql/sql_postgresql.c b/src/postgresql/sql_postgresql.c
index 44ea2c10..9106b1b1 100644
--- a/src/postgresql/sql_postgresql.c
+++ b/src/postgresql/sql_postgresql.c
@@ -84,7 +84,8 @@ const char * const SQL_SETTINGS_CLEANUP_DROP_ALL =
"DROP TABLE IF EXISTS cloudsync_settings CASCADE; "
"DROP TABLE IF EXISTS cloudsync_site_id CASCADE; "
"DROP TABLE IF EXISTS cloudsync_table_settings CASCADE; "
- "DROP TABLE IF EXISTS cloudsync_schema_versions CASCADE;";
+ "DROP TABLE IF EXISTS cloudsync_schema_versions CASCADE; "
+ "DROP TABLE IF EXISTS cloudsync_payload_fragments CASCADE;";
// MARK: CloudSync
@@ -105,6 +106,42 @@ const char * const SQL_CHANGES_INSERT_ROW =
"INSERT INTO cloudsync_changes(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) "
"VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9);";
+const char * const SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE =
+ "CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments ("
+ "value_id TEXT NOT NULL, part_index BIGINT NOT NULL, part_count BIGINT NOT NULL, total_size BIGINT NOT NULL, "
+ "checksum TEXT NOT NULL, created_at BIGINT NOT NULL DEFAULT (EXTRACT(EPOCH FROM now())::bigint), "
+ "tbl TEXT NOT NULL, pk BYTEA NOT NULL, col_name TEXT NOT NULL, col_version BIGINT NOT NULL, db_version BIGINT NOT NULL, "
+ "site_id BYTEA NOT NULL, cl BIGINT NOT NULL, seq BIGINT NOT NULL, fragment BYTEA NOT NULL, "
+ "PRIMARY KEY(value_id, part_index));";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_UPSERT =
+ "INSERT INTO cloudsync_payload_fragments "
+ "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) "
+ "VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15) "
+ "ON CONFLICT (value_id, part_index) DO UPDATE SET "
+ "part_count=EXCLUDED.part_count,total_size=EXCLUDED.total_size,checksum=EXCLUDED.checksum,created_at=EXCLUDED.created_at,"
+ "tbl=EXCLUDED.tbl,pk=EXCLUDED.pk,"
+ "col_name=EXCLUDED.col_name,col_version=EXCLUDED.col_version,db_version=EXCLUDED.db_version,"
+ "site_id=EXCLUDED.site_id,cl=EXCLUDED.cl,seq=EXCLUDED.seq,fragment=EXCLUDED.fragment;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_COUNT =
+ "SELECT COUNT(*), MIN(part_count), MAX(part_count), MIN(total_size), MAX(total_size), "
+ "MIN(checksum), MAX(checksum), MIN(part_index), MAX(part_index) "
+ "FROM cloudsync_payload_fragments WHERE value_id=$1;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_SELECT =
+ "SELECT fragment, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, checksum "
+ "FROM cloudsync_payload_fragments WHERE value_id=$1 ORDER BY part_index ASC;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_DELETE =
+ "DELETE FROM cloudsync_payload_fragments WHERE value_id=$1;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE =
+ "DELETE FROM cloudsync_payload_fragments "
+ "WHERE created_at < $1 AND value_id IN ("
+ "SELECT value_id FROM cloudsync_payload_fragments GROUP BY value_id "
+ "HAVING COUNT(*) < MAX(part_count));";
+
// MARK: Additional SQL constants for PostgreSQL
const char * const SQL_SITEID_SELECT_ROWID0 =
diff --git a/src/sql.h b/src/sql.h
index d9b9f0df..68371218 100644
--- a/src/sql.h
+++ b/src/sql.h
@@ -67,6 +67,12 @@ extern const char * const SQL_CLOUDSYNC_INSERT_MISSING_PKS_FROM_BASE_EXCEPT_SYNC
extern const char * const SQL_CLOUDSYNC_SELECT_PKS_NOT_IN_SYNC_FOR_COL;
extern const char * const SQL_CLOUDSYNC_SELECT_PKS_NOT_IN_SYNC_FOR_COL_FILTERED;
extern const char * const SQL_CHANGES_INSERT_ROW;
+extern const char * const SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE;
+extern const char * const SQL_PAYLOAD_FRAGMENTS_UPSERT;
+extern const char * const SQL_PAYLOAD_FRAGMENTS_COUNT;
+extern const char * const SQL_PAYLOAD_FRAGMENTS_SELECT;
+extern const char * const SQL_PAYLOAD_FRAGMENTS_DELETE;
+extern const char * const SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE;
// BLOCKS (block-level LWW)
extern const char * const SQL_BLOCKS_CREATE_TABLE;
diff --git a/src/sqlite/cloudsync_sqlite.c b/src/sqlite/cloudsync_sqlite.c
index bdff56b5..80d45cc0 100644
--- a/src/sqlite/cloudsync_sqlite.c
+++ b/src/sqlite/cloudsync_sqlite.c
@@ -12,6 +12,9 @@
#include "../block.h"
#include "../database.h"
#include "../dbutils.h"
+#include "../sql.h"
+#include
+#include
#ifndef CLOUDSYNC_OMIT_NETWORK
#include "../network/network.h"
@@ -135,12 +138,42 @@ void dbsync_seq (sqlite3_context *context, int argc, sqlite3_value **argv) {
void dbsync_uuid (sqlite3_context *context, int argc, sqlite3_value **argv) {
DEBUG_FUNCTION("cloudsync_uuid");
-
+
char value[UUID_STR_MAXLEN];
char *uuid = cloudsync_uuid_v7_string(value, true);
sqlite3_result_text(context, uuid, -1, SQLITE_TRANSIENT);
}
+// cloudsync_uuid_text(blob, [dash_format]) -> canonical UUID string
+void dbsync_uuid_text (sqlite3_context *context, int argc, sqlite3_value **argv) {
+ DEBUG_FUNCTION("cloudsync_uuid_text");
+
+ if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { sqlite3_result_null(context); return; }
+ if (sqlite3_value_type(argv[0]) != SQLITE_BLOB || sqlite3_value_bytes(argv[0]) != UUID_LEN) {
+ sqlite3_result_error(context, "cloudsync_uuid_text: expected a 16-byte BLOB.", -1);
+ return;
+ }
+ bool dash_format = (argc > 1) ? (sqlite3_value_int(argv[1]) != 0) : true;
+ char value[UUID_STR_MAXLEN];
+ cloudsync_uuid_v7_stringify((uint8_t *)sqlite3_value_blob(argv[0]), value, dash_format);
+ sqlite3_result_text(context, value, -1, SQLITE_TRANSIENT);
+}
+
+// cloudsync_uuid_blob(text) -> 16-byte UUID blob (accepts dashed/undashed)
+void dbsync_uuid_blob (sqlite3_context *context, int argc, sqlite3_value **argv) {
+ DEBUG_FUNCTION("cloudsync_uuid_blob");
+
+ if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { sqlite3_result_null(context); return; }
+ const char *str = (const char *)sqlite3_value_text(argv[0]);
+ int len = sqlite3_value_bytes(argv[0]);
+ uint8_t uuid[UUID_LEN];
+ if (!str || cloudsync_uuid_v7_parse(str, len, uuid) != 0) {
+ sqlite3_result_error(context, "cloudsync_uuid_blob: malformed UUID string.", -1);
+ return;
+ }
+ sqlite3_result_blob(context, uuid, UUID_LEN, SQLITE_TRANSIENT);
+}
+
// MARK: -
void dbsync_set (sqlite3_context *context, int argc, sqlite3_value **argv) {
@@ -1061,11 +1094,16 @@ void dbsync_payload_decode (sqlite3_context *context, int argc, sqlite3_value **
// obtain payload
const char *payload = (const char *)database_value_blob(argv[0]);
-
+
// apply changes
+ // The public SQL function applies a single complete payload: advance the
+ // receive cursor to its last applied (db_version, seq) (legacy behavior, safe
+ // for a payload that ends on a db_version boundary). The chunked-download
+ // receive path gates cursor advancement on stream completion via the C-level
+ // checkpoint argument instead (see cloudsync_payload_apply in cloudsync.h).
int nrows = 0;
cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
- int rc = cloudsync_payload_apply(data, payload, blen, &nrows);
+ int rc = cloudsync_payload_apply(data, payload, blen, &nrows, CLOUDSYNC_CHECKPOINT_LAST_APPLIED, 0);
if (rc != SQLITE_OK) {
sqlite3_result_error(context, cloudsync_errmsg(data), -1);
sqlite3_result_error_code(context, rc);
@@ -1076,6 +1114,673 @@ void dbsync_payload_decode (sqlite3_context *context, int argc, sqlite3_value **
sqlite3_result_int(context, nrows);
}
+typedef struct {
+ sqlite3_vtab base;
+ sqlite3 *db;
+ cloudsync_context *data;
+} cloudsync_payload_chunks_vtab;
+
+typedef struct {
+ sqlite3_vtab_cursor base;
+ cloudsync_payload_chunks_vtab *vtab;
+ sqlite3_stmt *src;
+ bool eof;
+ bool has_row;
+ int chunk_index;
+ char *payload;
+ int64_t payload_size;
+ int64_t rows;
+ int64_t dbv_min;
+ int64_t dbv_max;
+ int64_t watermark;
+ bool frag_active;
+ int frag_part;
+ int frag_count;
+ int frag_target;
+ int64_t frag_offset;
+ int64_t frag_total;
+ uint64_t frag_checksum;
+ char value_header[16];
+ int value_header_len;
+ const char *value_data;
+ int64_t value_data_len;
+ // Positional-cursor outputs: the resume point AFTER the chunk currently held.
+ // These live in the per-scan reset region (after eof) so xFilter's bulk memset
+ // clears them. next_* is the (db_version, seq, frag_offset) a follow-up call
+ // passes back as resume_* to continue exactly where this chunk stopped.
+ int64_t next_dbv;
+ int64_t next_seq;
+ int64_t next_frag_offset;
+ bool is_final;
+} cloudsync_payload_chunks_cursor;
+
+static int payload_chunks_connect(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtab, char **err) {
+ UNUSED_PARAMETER(argc); UNUSED_PARAMETER(argv); UNUSED_PARAMETER(err);
+ int rc = sqlite3_declare_vtab(db,
+ "CREATE TABLE x(payload BLOB, chunk_index INTEGER, payload_size INTEGER, rows INTEGER, "
+ "db_version_min INTEGER, db_version_max INTEGER, watermark_db_version INTEGER, "
+ "since_db_version HIDDEN, site_id HIDDEN, until_db_version HIDDEN, exclude_filter_site_id HIDDEN, "
+ // Positional-cursor outputs (cols 11..14): the resume point after the
+ // emitted chunk, plus a final-chunk flag. A stateless /check passes these
+ // back as the resume_* inputs (cols 15..17) to continue the drain without
+ // a spool table — O(1) seek per chunk instead of replaying from since.
+ "next_db_version INTEGER, next_seq INTEGER, next_frag_offset INTEGER, is_final INTEGER, "
+ "resume_db_version HIDDEN, resume_seq HIDDEN, resume_frag_offset HIDDEN)");
+ if (rc != SQLITE_OK) return rc;
+ cloudsync_payload_chunks_vtab *p = sqlite3_malloc64(sizeof(*p));
+ if (!p) return SQLITE_NOMEM;
+ memset(p, 0, sizeof(*p));
+ p->db = db;
+ p->data = (cloudsync_context *)aux;
+ *vtab = (sqlite3_vtab *)p;
+ return SQLITE_OK;
+}
+
+static int payload_chunks_disconnect(sqlite3_vtab *vtab) {
+ sqlite3_free(vtab);
+ return SQLITE_OK;
+}
+
+static int payload_chunks_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursor) {
+ cloudsync_payload_chunks_cursor *c = cloudsync_memory_zeroalloc(sizeof(*c));
+ if (!c) return SQLITE_NOMEM;
+ c->vtab = (cloudsync_payload_chunks_vtab *)vtab;
+ *cursor = (sqlite3_vtab_cursor *)c;
+ return SQLITE_OK;
+}
+
+static int payload_chunks_close(sqlite3_vtab_cursor *cursor) {
+ cloudsync_payload_chunks_cursor *c = (cloudsync_payload_chunks_cursor *)cursor;
+ if (c->src) sqlite3_finalize(c->src);
+ if (c->payload) cloudsync_memory_free(c->payload);
+ cloudsync_memory_free(c);
+ return SQLITE_OK;
+}
+
+static int payload_chunks_best_index(sqlite3_vtab *vtab, sqlite3_index_info *idxinfo) {
+ UNUSED_PARAMETER(vtab);
+ // Assign argvIndex in a canonical hidden-column order so xFilter can read argv
+ // in a fixed order regardless of how SQLite presents constraints. idxNum bit k
+ // is set when handled_cols[k] is bound; xFilter reads argv in this same order.
+ // bit0=since_db_version(7) bit1=site_id(8) bit2=until_db_version(9)
+ // bit3=exclude_filter_site_id(10) bit4=resume_db_version(15)
+ // bit5=resume_seq(16) bit6=resume_frag_offset(17)
+ static const int handled_cols[] = {7, 8, 9, 10, 15, 16, 17};
+ int argv_index = 1;
+ int idxnum = 0;
+ for (size_t k = 0; k < sizeof(handled_cols) / sizeof(handled_cols[0]); ++k) {
+ int col = handled_cols[k];
+ for (int i = 0; i < idxinfo->nConstraint; ++i) {
+ struct sqlite3_index_constraint *cn = &idxinfo->aConstraint[i];
+ if (!cn->usable || cn->op != SQLITE_INDEX_CONSTRAINT_EQ || cn->iColumn != col) continue;
+ idxinfo->aConstraintUsage[i].argvIndex = argv_index++;
+ idxinfo->aConstraintUsage[i].omit = 1;
+ idxnum |= (1 << k);
+ break; // at most one constraint consumed per hidden column
+ }
+ }
+ idxinfo->idxNum = idxnum;
+ idxinfo->estimatedCost = 10.0;
+ idxinfo->estimatedRows = 10;
+ return SQLITE_OK;
+}
+
+static int payload_chunks_step_source(cloudsync_payload_chunks_cursor *c) {
+ int rc = sqlite3_step(c->src);
+ if (rc == SQLITE_ROW) { c->has_row = true; return SQLITE_OK; }
+ c->has_row = false;
+ return rc == SQLITE_DONE ? SQLITE_OK : rc;
+}
+
+static int payload_chunks_plan_fragment(cloudsync_payload_chunks_cursor *c) {
+ cloudsync_context *data = c->vtab->data;
+ int target = cloudsync_payload_fragment_data_size(data,
+ (const char *)sqlite3_column_text(c->src, 0), sqlite3_column_bytes(c->src, 0),
+ sqlite3_column_blob(c->src, 1), sqlite3_column_bytes(c->src, 1),
+ (const char *)sqlite3_column_text(c->src, 2), sqlite3_column_bytes(c->src, 2),
+ sqlite3_column_int64(c->src, 4), sqlite3_column_int64(c->src, 5),
+ sqlite3_column_blob(c->src, 6), sqlite3_column_bytes(c->src, 6),
+ sqlite3_column_int64(c->src, 7), sqlite3_column_int64(c->src, 8),
+ c->frag_total, 0, 1);
+ if (target <= 0) return SQLITE_TOOBIG;
+
+ int count = 0;
+ for (int i = 0; i < CLOUDSYNC_PAYLOAD_FRAGMENT_SIZE_FIXPOINT_ITERATIONS; ++i) {
+ count = cloudsync_payload_fragment_count(c->frag_total, target);
+ if (count <= 0) return SQLITE_TOOBIG;
+ int planned = cloudsync_payload_fragment_data_size(data,
+ (const char *)sqlite3_column_text(c->src, 0), sqlite3_column_bytes(c->src, 0),
+ sqlite3_column_blob(c->src, 1), sqlite3_column_bytes(c->src, 1),
+ (const char *)sqlite3_column_text(c->src, 2), sqlite3_column_bytes(c->src, 2),
+ sqlite3_column_int64(c->src, 4), sqlite3_column_int64(c->src, 5),
+ sqlite3_column_blob(c->src, 6), sqlite3_column_bytes(c->src, 6),
+ sqlite3_column_int64(c->src, 7), sqlite3_column_int64(c->src, 8),
+ c->frag_total, count - 1, count);
+ if (planned <= 0) return SQLITE_TOOBIG;
+ if (planned == target) break;
+ target = planned;
+ }
+
+ c->frag_target = target;
+ c->frag_count = cloudsync_payload_fragment_count(c->frag_total, target);
+ if (c->frag_count <= 0) return SQLITE_TOOBIG;
+ return SQLITE_OK;
+}
+
+// Set up fragment state for the current source row (a single value larger than
+// max_chunk_size) so emit_fragment can stream it. start_offset is the byte offset
+// within the encoded value to resume from (0 when first reaching the value;
+// >0 when a positional cursor resumes mid-value). frag_part is derived from the
+// offset so the fragment's part index is consistent whether reached by streaming
+// or by a seek. The plan (frag_target/frag_count) is a deterministic function of
+// the row, so a resumed fragment tiles identically to a streamed one.
+static int payload_chunks_begin_fragment(cloudsync_payload_chunks_cursor *c, int64_t start_offset) {
+ dbvalue_t *col_value = (dbvalue_t *)sqlite3_column_value(c->src, 3);
+ int type = database_value_type(col_value);
+ if (type != DBTYPE_TEXT && type != DBTYPE_BLOB) return SQLITE_TOOBIG;
+ int64_t raw_len = 0;
+ int header_len = cloudsync_payload_encoded_value_header(col_value, c->value_header, sizeof(c->value_header), &raw_len);
+ if (header_len <= 0) return SQLITE_ERROR;
+ c->value_header_len = header_len;
+ c->value_data = (const char *)database_value_blob(col_value);
+ c->value_data_len = raw_len;
+ c->frag_total = header_len + raw_len;
+ c->frag_offset = start_offset;
+ int rc = payload_chunks_plan_fragment(c);
+ if (rc != SQLITE_OK) return rc;
+ c->frag_part = (c->frag_target > 0) ? (int)(start_offset / c->frag_target) : 0;
+ c->frag_checksum = cloudsync_payload_encoded_value_checksum(col_value);
+ c->frag_active = true;
+ return SQLITE_OK;
+}
+
+static int payload_chunks_emit_fragment(cloudsync_payload_chunks_cursor *c) {
+ cloudsync_context *data = c->vtab->data;
+ if (c->payload) { cloudsync_memory_free(c->payload); c->payload = NULL; }
+ int64_t remaining = c->frag_total - c->frag_offset;
+ int frag_len = remaining > c->frag_target ? c->frag_target : (int)remaining;
+ if (frag_len <= 0) return SQLITE_CORRUPT;
+ char *frag = cloudsync_memory_alloc((uint64_t)frag_len);
+ if (!frag) return SQLITE_NOMEM;
+ int copied = 0;
+ int64_t off = c->frag_offset;
+ if (off < c->value_header_len) {
+ int n = c->value_header_len - (int)off;
+ if (n > frag_len) n = frag_len;
+ memcpy(frag, c->value_header + off, (size_t)n);
+ copied += n;
+ off += n;
+ }
+ if (copied < frag_len) {
+ int64_t data_off = off - c->value_header_len;
+ memcpy(frag + copied, c->value_data + data_off, (size_t)(frag_len - copied));
+ }
+
+ cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL));
+ if (!payload) { cloudsync_memory_free(frag); return SQLITE_NOMEM; }
+ int rc = cloudsync_payload_encode_fragment_step(payload, data,
+ (const char *)sqlite3_column_text(c->src, 0), sqlite3_column_bytes(c->src, 0),
+ sqlite3_column_blob(c->src, 1), sqlite3_column_bytes(c->src, 1),
+ (const char *)sqlite3_column_text(c->src, 2), sqlite3_column_bytes(c->src, 2),
+ frag, frag_len,
+ sqlite3_column_int64(c->src, 4), sqlite3_column_int64(c->src, 5),
+ sqlite3_column_blob(c->src, 6), sqlite3_column_bytes(c->src, 6),
+ sqlite3_column_int64(c->src, 7), sqlite3_column_int64(c->src, 8),
+ c->frag_checksum, c->frag_total, c->frag_part, c->frag_count);
+ cloudsync_memory_free(frag);
+ if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; }
+ rc = cloudsync_payload_encode_final(payload, data);
+ if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; }
+ c->payload = cloudsync_payload_blob(payload, &c->payload_size, &c->rows);
+ cloudsync_memory_free(payload);
+ c->dbv_min = sqlite3_column_int64(c->src, 5);
+ c->dbv_max = c->dbv_min;
+ c->chunk_index++;
+ c->frag_offset += frag_len;
+ c->frag_part++;
+ if (c->frag_part >= c->frag_count) {
+ c->frag_active = false;
+ rc = payload_chunks_step_source(c);
+ }
+ return rc;
+}
+
+static int payload_chunks_build_next(cloudsync_payload_chunks_cursor *c) {
+ cloudsync_context *data = c->vtab->data;
+ int rc = SQLITE_OK;
+ if (c->payload) { cloudsync_memory_free(c->payload); c->payload = NULL; }
+ c->payload_size = c->rows = c->dbv_min = c->dbv_max = 0;
+ if (c->frag_active) return payload_chunks_emit_fragment(c);
+ if (!c->has_row) { c->eof = true; return SQLITE_OK; }
+
+ int max_size = cloudsync_payload_max_chunk_size(data);
+ size_t payload_header_size = 0;
+ cloudsync_payload_context_size(&payload_header_size);
+ cloudsync_payload_context *payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL));
+ if (!payload) return SQLITE_NOMEM;
+ while (c->has_row) {
+ sqlite3_value *rowv[9];
+ for (int i = 0; i < 9; ++i) rowv[i] = sqlite3_column_value(c->src, i);
+ size_t row_size = pk_encode_size((dbvalue_t **)rowv, 9, 0, -1);
+ if (row_size == SIZE_MAX) { cloudsync_memory_free(payload); return SQLITE_NOMEM; }
+
+ if ((int64_t)row_size + (int64_t)payload_header_size + CLOUDSYNC_PAYLOAD_CHUNK_SAFETY_MARGIN > max_size) {
+ if (cloudsync_payload_context_nrows(payload) > 0) break;
+ cloudsync_memory_free(payload);
+ rc = payload_chunks_begin_fragment(c, 0);
+ if (rc != SQLITE_OK) return rc;
+ return payload_chunks_emit_fragment(c);
+ }
+
+ if (cloudsync_payload_context_nrows(payload) > 0 && cloudsync_payload_context_bused(payload) + row_size > (size_t)max_size) break;
+ rc = cloudsync_payload_encode_step(payload, data, 9, (dbvalue_t **)rowv);
+ if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; }
+ int64_t dbv = sqlite3_column_int64(c->src, 5);
+ if (cloudsync_payload_context_nrows(payload) == 1) c->dbv_min = dbv;
+ c->dbv_max = dbv;
+ rc = payload_chunks_step_source(c);
+ if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; }
+ }
+
+ if (cloudsync_payload_context_nrows(payload) == 0) { cloudsync_memory_free(payload); c->eof = true; return SQLITE_OK; }
+ rc = cloudsync_payload_encode_final(payload, data);
+ if (rc != SQLITE_OK) { cloudsync_memory_free(payload); return rc; }
+ c->payload = cloudsync_payload_blob(payload, &c->payload_size, &c->rows);
+ cloudsync_memory_free(payload);
+ c->chunk_index++;
+ return SQLITE_OK;
+}
+
+// Record the resume point a stateless caller passes back to continue after the
+// chunk just built. Reads the source statement, which is positioned at the next
+// unconsumed row (or the same row when a value is still mid-fragment). Must be
+// called only after build_next produced a chunk (i.e. !eof).
+static void payload_chunks_set_next_cursor(cloudsync_payload_chunks_cursor *c) {
+ if (c->frag_active) {
+ // Mid-value: resume the same row at the next byte offset.
+ c->next_dbv = sqlite3_column_int64(c->src, 5);
+ c->next_seq = sqlite3_column_int64(c->src, 8);
+ c->next_frag_offset = c->frag_offset;
+ c->is_final = false;
+ } else if (c->has_row) {
+ // Row boundary: the next chunk starts at the current (unconsumed) row.
+ c->next_dbv = sqlite3_column_int64(c->src, 5);
+ c->next_seq = sqlite3_column_int64(c->src, 8);
+ c->next_frag_offset = 0;
+ c->is_final = false;
+ } else {
+ // Stream exhausted: this was the last chunk of the window.
+ c->next_dbv = c->watermark;
+ c->next_seq = 0;
+ c->next_frag_offset = 0;
+ c->is_final = true;
+ }
+}
+
+static int payload_chunks_advance(cloudsync_payload_chunks_cursor *c) {
+ int rc = payload_chunks_build_next(c);
+ if (rc == SQLITE_OK && !c->eof) payload_chunks_set_next_cursor(c);
+ return rc;
+}
+
+static int payload_chunks_filter(sqlite3_vtab_cursor *cursor, int idxnum, const char *idxstr, int argc, sqlite3_value **argv) {
+ UNUSED_PARAMETER(idxstr); UNUSED_PARAMETER(argc);
+ cloudsync_payload_chunks_cursor *c = (cloudsync_payload_chunks_cursor *)cursor;
+ cloudsync_context *data = c->vtab->data;
+ if (c->src) { sqlite3_finalize(c->src); c->src = NULL; }
+ if (c->payload) { cloudsync_memory_free(c->payload); c->payload = NULL; }
+ // Contract: all per-scan state that can be bulk-reset here must live at or
+ // after eof. Fields before eof are cursor lifetime state preserved across
+ // xFilter calls.
+ memset(&c->eof, 0, sizeof(*c) - offsetof(cloudsync_payload_chunks_cursor, eof));
+
+ int argi = 0;
+ int64_t since = dbutils_settings_get_int64_value(data, CLOUDSYNC_KEY_SEND_DBVERSION);
+ const void *site_id = NULL;
+ int site_id_len = 0;
+ bool site_id_given = false;
+ int64_t until = 0;
+ bool exclude = false;
+ // Positional resume cursor (cols 15..17): when resume_db_version is bound the
+ // scan starts at (resume_db_version, resume_seq) inclusive and the first chunk
+ // resumes a mid-value fragment at resume_frag_offset, instead of replaying the
+ // whole window from `since`. Lets a stateless /check page the stream with an
+ // O(1) seek per call and no spool table.
+ bool positional = false;
+ int64_t resume_dbv = 0, resume_seq = 0, resume_frag = 0;
+ if (idxnum & 1) since = sqlite3_value_int64(argv[argi++]);
+ if (idxnum & 2) {
+ if (sqlite3_value_type(argv[argi]) != SQLITE_NULL) {
+ site_id = sqlite3_value_blob(argv[argi]);
+ site_id_len = sqlite3_value_bytes(argv[argi]);
+ site_id_given = true;
+ }
+ argi++;
+ }
+ if (idxnum & 4) until = sqlite3_value_int64(argv[argi++]);
+ if (idxnum & 8) exclude = (sqlite3_value_int(argv[argi++]) != 0);
+ if (idxnum & 16) { resume_dbv = sqlite3_value_int64(argv[argi++]); positional = true; }
+ if (idxnum & 32) resume_seq = sqlite3_value_int64(argv[argi++]);
+ if (idxnum & 64) resume_frag = sqlite3_value_int64(argv[argi++]);
+
+ // Resolve the site filter:
+ // exclude=true -> all sites except filter_site_id (CHECK path); site required
+ // filter given -> only that site
+ // default -> local site (send path, unchanged)
+ const char *site_op;
+ if (exclude) {
+ if (!site_id_given) {
+ c->vtab->base.zErrMsg = sqlite3_mprintf(
+ "cloudsync_payload_chunks: exclude_filter_site_id requires a non-NULL site_id");
+ return SQLITE_ERROR;
+ }
+ site_op = "<>";
+ } else {
+ site_op = "=";
+ if (!site_id_given) { site_id = cloudsync_siteid(data); site_id_len = UUID_LEN; }
+ }
+
+ if (until == 0) {
+ char *mxsql = sqlite3_mprintf(
+ "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes WHERE site_id%s?", site_op);
+ if (!mxsql) return SQLITE_NOMEM;
+ sqlite3_stmt *mx = NULL;
+ int rc = sqlite3_prepare_v2(c->vtab->db, mxsql, -1, &mx, NULL);
+ sqlite3_free(mxsql);
+ if (rc != SQLITE_OK) return rc;
+ sqlite3_bind_blob(mx, 1, site_id, site_id_len, SQLITE_TRANSIENT);
+ if (sqlite3_step(mx) == SQLITE_ROW) until = sqlite3_column_int64(mx, 0);
+ sqlite3_finalize(mx);
+ }
+ c->watermark = until;
+
+ // Window upper bound is always `until`. The lower bound is either the legacy
+ // exclusive `since` (db_version > since) or the inclusive positional cursor
+ // (db_version, seq) >= (resume_dbv, resume_seq).
+ char *sql;
+ if (positional) {
+ sql = sqlite3_mprintf(
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes WHERE db_version<=? AND site_id%s? AND "
+ "(db_version>? OR (db_version=? AND seq>=?)) ORDER BY db_version, seq ASC",
+ site_op);
+ } else {
+ sql = sqlite3_mprintf(
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes WHERE db_version>? AND site_id%s? AND db_version<=? ORDER BY db_version, seq ASC",
+ site_op);
+ }
+ if (!sql) return SQLITE_NOMEM;
+ int rc = sqlite3_prepare_v2(c->vtab->db, sql, -1, &c->src, NULL);
+ sqlite3_free(sql);
+ if (rc != SQLITE_OK) return rc;
+ if (positional) {
+ sqlite3_bind_int64(c->src, 1, until);
+ sqlite3_bind_blob(c->src, 2, site_id, site_id_len, SQLITE_TRANSIENT);
+ sqlite3_bind_int64(c->src, 3, resume_dbv);
+ sqlite3_bind_int64(c->src, 4, resume_dbv);
+ sqlite3_bind_int64(c->src, 5, resume_seq);
+ } else {
+ sqlite3_bind_int64(c->src, 1, since);
+ sqlite3_bind_blob(c->src, 2, site_id, site_id_len, SQLITE_TRANSIENT);
+ sqlite3_bind_int64(c->src, 3, until);
+ }
+ rc = payload_chunks_step_source(c);
+ if (rc != SQLITE_OK) return rc;
+ // Resuming inside a value that was fragmented across chunks: the first row is
+ // that value; re-establish the fragment plan and skip to resume_frag.
+ if (positional && resume_frag > 0 && c->has_row) {
+ rc = payload_chunks_begin_fragment(c, resume_frag);
+ if (rc != SQLITE_OK) return rc;
+ }
+ return payload_chunks_advance(c);
+}
+
+static int payload_chunks_next(sqlite3_vtab_cursor *cursor) {
+ return payload_chunks_advance((cloudsync_payload_chunks_cursor *)cursor);
+}
+
+static int payload_chunks_eof(sqlite3_vtab_cursor *cursor) {
+ return ((cloudsync_payload_chunks_cursor *)cursor)->eof;
+}
+
+static int payload_chunks_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int col) {
+ cloudsync_payload_chunks_cursor *c = (cloudsync_payload_chunks_cursor *)cursor;
+ switch (col) {
+ case 0: sqlite3_result_blob64(ctx, c->payload, (sqlite3_uint64)c->payload_size, SQLITE_TRANSIENT); break;
+ case 1: sqlite3_result_int(ctx, c->chunk_index - 1); break;
+ case 2: sqlite3_result_int64(ctx, c->payload_size); break;
+ case 3: sqlite3_result_int64(ctx, c->rows); break;
+ case 4: sqlite3_result_int64(ctx, c->dbv_min); break;
+ case 5: sqlite3_result_int64(ctx, c->dbv_max); break;
+ case 6: sqlite3_result_int64(ctx, c->watermark); break;
+ case 11: sqlite3_result_int64(ctx, c->next_dbv); break;
+ case 12: sqlite3_result_int64(ctx, c->next_seq); break;
+ case 13: sqlite3_result_int64(ctx, c->next_frag_offset); break;
+ case 14: sqlite3_result_int(ctx, c->is_final ? 1 : 0); break;
+ default: sqlite3_result_null(ctx); break;
+ }
+ return SQLITE_OK;
+}
+
+static int payload_chunks_rowid(sqlite3_vtab_cursor *cursor, sqlite3_int64 *rowid) {
+ *rowid = ((cloudsync_payload_chunks_cursor *)cursor)->chunk_index;
+ return SQLITE_OK;
+}
+
+static sqlite3_module cloudsync_payload_chunks_module = {
+ /* iVersion */ 0,
+ /* xCreate */ NULL,
+ /* xConnect */ payload_chunks_connect,
+ /* xBestIndex */ payload_chunks_best_index,
+ /* xDisconnect */ payload_chunks_disconnect,
+ /* xDestroy */ NULL,
+ /* xOpen */ payload_chunks_open,
+ /* xClose */ payload_chunks_close,
+ /* xFilter */ payload_chunks_filter,
+ /* xNext */ payload_chunks_next,
+ /* xEof */ payload_chunks_eof,
+ /* xColumn */ payload_chunks_column,
+ /* xRowid */ payload_chunks_rowid,
+ /* xUpdate */ NULL,
+ /* xBegin */ NULL,
+ /* xSync */ NULL,
+ /* xCommit */ NULL,
+ /* xRollback */ NULL,
+ /* xFindMethod */ NULL,
+ /* xRename */ NULL,
+ /* xSavepoint */ NULL,
+ /* xRelease */ NULL,
+ /* xRollbackTo */ NULL,
+ /* xShadowName */ NULL,
+ /* xIntegrity */ NULL
+};
+
+static int payload_estimated_size_add(sqlite3_int64 *acc, sqlite3_int64 value) {
+ if (value < 0 || *acc > INT64_MAX - value) return SQLITE_TOOBIG;
+ *acc += value;
+ return SQLITE_OK;
+}
+
+static int payload_blob_checked_estimate(sqlite3 *db, const void *site_id, int site_id_len,
+ sqlite3_int64 since, sqlite3_int64 since_seq,
+ bool exclude, sqlite3_int64 *estimated,
+ sqlite3_int64 *watermark) {
+ sqlite3_stmt *stmt = NULL;
+ sqlite3_stmt *mx = NULL;
+ int rc = SQLITE_OK;
+ sqlite3_int64 until = 0;
+ size_t header_size = 0;
+ bool has_rows = false;
+ const char *site_op = exclude ? "<>" : "=";
+ *estimated = 0;
+
+ char *mxsql = sqlite3_mprintf(
+ "SELECT COALESCE(MAX(db_version),0) FROM cloudsync_changes WHERE site_id%s?", site_op);
+ if (!mxsql) return SQLITE_NOMEM;
+ rc = sqlite3_prepare_v2(db, mxsql, -1, &mx, NULL);
+ sqlite3_free(mxsql);
+ if (rc != SQLITE_OK) goto error;
+ sqlite3_bind_blob(mx, 1, site_id, site_id_len, SQLITE_TRANSIENT);
+ rc = sqlite3_step(mx);
+ if (rc == SQLITE_ROW) until = sqlite3_column_int64(mx, 0);
+ else if (rc != SQLITE_DONE) goto error;
+ sqlite3_finalize(mx);
+ mx = NULL;
+ if (watermark) *watermark = until;
+
+ if (until < since) return SQLITE_OK;
+
+ char *sql = sqlite3_mprintf(
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes WHERE (db_version>? OR (db_version=? AND seq>?)) "
+ "AND site_id%s? AND db_version<=? ORDER BY db_version, seq ASC",
+ site_op);
+ if (!sql) return SQLITE_NOMEM;
+ rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
+ sqlite3_free(sql);
+ if (rc != SQLITE_OK) goto error;
+ sqlite3_bind_int64(stmt, 1, since);
+ sqlite3_bind_int64(stmt, 2, since);
+ sqlite3_bind_int64(stmt, 3, since_seq);
+ sqlite3_bind_blob(stmt, 4, site_id, site_id_len, SQLITE_TRANSIENT);
+ sqlite3_bind_int64(stmt, 5, until);
+
+ cloudsync_payload_context_size(&header_size);
+ while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
+ sqlite3_value *rowv[9];
+ for (int i = 0; i < 9; ++i) rowv[i] = sqlite3_column_value(stmt, i);
+ size_t row_size = pk_encode_size((dbvalue_t **)rowv, 9, 0, -1);
+ if (row_size == SIZE_MAX || row_size > (size_t)INT64_MAX) {
+ rc = SQLITE_TOOBIG;
+ goto error;
+ }
+ if (!has_rows) {
+ if (header_size > (size_t)INT64_MAX) {
+ rc = SQLITE_TOOBIG;
+ goto error;
+ }
+ rc = payload_estimated_size_add(estimated, (sqlite3_int64)header_size);
+ if (rc != SQLITE_OK) goto error;
+ has_rows = true;
+ }
+ rc = payload_estimated_size_add(estimated, (sqlite3_int64)row_size);
+ if (rc != SQLITE_OK) goto error;
+ }
+ if (rc != SQLITE_DONE) goto error;
+ sqlite3_finalize(stmt);
+ return SQLITE_OK;
+
+error:
+ if (stmt) sqlite3_finalize(stmt);
+ if (mx) sqlite3_finalize(mx);
+ return rc;
+}
+
+void dbsync_payload_blob_checked(sqlite3_context *context, int argc, sqlite3_value **argv) {
+ DEBUG_FUNCTION("cloudsync_payload_blob_checked");
+ UNUSED_PARAMETER(argc);
+
+ cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
+ sqlite3 *db = sqlite3_context_db_handle(context);
+ sqlite3_stmt *stmt = NULL;
+ cloudsync_payload_context *payload = NULL;
+ int rc = SQLITE_OK;
+ sqlite3_int64 since = 0;
+ sqlite3_int64 since_seq = 0;
+ sqlite3_int64 max_estimated_size = 0;
+ sqlite3_int64 estimated = 0;
+ sqlite3_int64 watermark = 0;
+ const void *site_id = NULL;
+ int site_id_len = 0;
+ bool exclude = false;
+
+ if (sqlite3_value_type(argv[0]) != SQLITE_NULL) since = sqlite3_value_int64(argv[0]);
+ if (sqlite3_value_type(argv[1]) != SQLITE_NULL) since_seq = sqlite3_value_int64(argv[1]);
+ if (sqlite3_value_type(argv[2]) != SQLITE_NULL) {
+ site_id = sqlite3_value_blob(argv[2]);
+ site_id_len = sqlite3_value_bytes(argv[2]);
+ }
+ exclude = sqlite3_value_type(argv[3]) != SQLITE_NULL && sqlite3_value_int(argv[3]) != 0;
+ if (sqlite3_value_type(argv[4]) == SQLITE_NULL || sqlite3_value_int64(argv[4]) <= 0) {
+ sqlite3_result_error(context, "cloudsync_payload_blob_checked: max_estimated_payload_size must be positive", -1);
+ return;
+ }
+ max_estimated_size = sqlite3_value_int64(argv[4]);
+
+ if (exclude && !site_id) {
+ sqlite3_result_error(context,
+ "cloudsync_payload_blob_checked: exclude_filter_site_id requires a non-NULL site_id", -1);
+ return;
+ }
+ if (!exclude && !site_id) {
+ site_id = cloudsync_siteid(data);
+ site_id_len = UUID_LEN;
+ }
+
+ rc = payload_blob_checked_estimate(db, site_id, site_id_len, since, since_seq, exclude, &estimated, &watermark);
+ if (rc != SQLITE_OK) goto error;
+ if (estimated == 0) {
+ sqlite3_result_null(context);
+ return;
+ }
+ if (estimated > max_estimated_size) {
+ dbsync_set_error(context,
+ CLOUDSYNC_ERRCODE_PAYLOAD_TOO_LARGE "cloudsync_payload_blob_checked: estimated payload size %" PRId64 " exceeds max_estimated_payload_size %" PRId64,
+ (int64_t)estimated, (int64_t)max_estimated_size);
+ return;
+ }
+
+ const char *site_op = exclude ? "<>" : "=";
+ char *sql = sqlite3_mprintf(
+ "SELECT tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq "
+ "FROM cloudsync_changes WHERE (db_version>? OR (db_version=? AND seq>?)) "
+ "AND site_id%s? AND db_version<=? ORDER BY db_version, seq ASC",
+ site_op);
+ if (!sql) { rc = SQLITE_NOMEM; goto error; }
+ rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
+ sqlite3_free(sql);
+ if (rc != SQLITE_OK) goto error;
+ sqlite3_bind_int64(stmt, 1, since);
+ sqlite3_bind_int64(stmt, 2, since);
+ sqlite3_bind_int64(stmt, 3, since_seq);
+ sqlite3_bind_blob(stmt, 4, site_id, site_id_len, SQLITE_TRANSIENT);
+ sqlite3_bind_int64(stmt, 5, watermark);
+
+ payload = cloudsync_memory_zeroalloc((uint64_t)cloudsync_payload_context_size(NULL));
+ if (!payload) { rc = SQLITE_NOMEM; goto error; }
+
+ while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
+ sqlite3_value *rowv[9];
+ for (int i = 0; i < 9; ++i) rowv[i] = sqlite3_column_value(stmt, i);
+ rc = cloudsync_payload_encode_step(payload, data, 9, (dbvalue_t **)rowv);
+ if (rc != SQLITE_OK) goto error;
+ }
+ if (rc != SQLITE_DONE) goto error;
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ rc = cloudsync_payload_encode_final(payload, data);
+ if (rc != SQLITE_OK) goto error;
+ int64_t blob_size = 0;
+ char *blob = cloudsync_payload_blob(payload, &blob_size, NULL);
+ if (!blob) {
+ sqlite3_result_null(context);
+ cloudsync_payload_context_free(payload);
+ } else {
+ sqlite3_result_blob64(context, blob, (sqlite3_uint64)blob_size, cloudsync_memory_free);
+ cloudsync_memory_free(payload);
+ }
+ return;
+
+error:
+ if (stmt) sqlite3_finalize(stmt);
+ if (payload) cloudsync_payload_context_free(payload);
+ if (rc == SQLITE_NOMEM) sqlite3_result_error_nomem(context);
+ else if (rc == SQLITE_TOOBIG) sqlite3_result_error(context, CLOUDSYNC_ERRCODE_PAYLOAD_TOO_LARGE "cloudsync_payload_blob_checked: payload estimate is too large", -1);
+ else sqlite3_result_error(context, sqlite3_errmsg(db), -1);
+}
+
#ifdef CLOUDSYNC_DESKTOP_OS
void dbsync_payload_save (sqlite3_context *context, int argc, sqlite3_value **argv) {
DEBUG_FUNCTION("dbsync_payload_save");
@@ -1131,7 +1836,9 @@ void dbsync_payload_load (sqlite3_context *context, int argc, sqlite3_value **ar
int nrows = 0;
cloudsync_context *data = (cloudsync_context *)sqlite3_user_data(context);
- int rc = cloudsync_payload_apply (data, payload, (int)payload_size, &nrows);
+ // File-based load applies a complete monolithic payload: legacy last-applied
+ // checkpoint (ends on a db_version boundary, so it is safe).
+ int rc = cloudsync_payload_apply (data, payload, (int)payload_size, &nrows, CLOUDSYNC_CHECKPOINT_LAST_APPLIED, 0);
if (payload) cloudsync_memory_free(payload);
if (rc != SQLITE_OK) {
@@ -1442,7 +2149,14 @@ int dbsync_register_functions (sqlite3 *db, char **pzErrMsg) {
rc = dbsync_register_function(db, "cloudsync_uuid", dbsync_uuid, 0, pzErrMsg, ctx, NULL);
if (rc != SQLITE_OK) return rc;
-
+
+ rc = dbsync_register_function(db, "cloudsync_uuid_text", dbsync_uuid_text, 1, pzErrMsg, ctx, NULL);
+ if (rc != SQLITE_OK) return rc;
+ rc = dbsync_register_function(db, "cloudsync_uuid_text", dbsync_uuid_text, 2, pzErrMsg, ctx, NULL);
+ if (rc != SQLITE_OK) return rc;
+ rc = dbsync_register_function(db, "cloudsync_uuid_blob", dbsync_uuid_blob, 1, pzErrMsg, ctx, NULL);
+ if (rc != SQLITE_OK) return rc;
+
// PAYLOAD
rc = dbsync_register_aggregate(db, "cloudsync_payload_encode", dbsync_payload_encode_step, dbsync_payload_encode_final, -1, pzErrMsg, ctx, NULL);
if (rc != SQLITE_OK) return rc;
@@ -1452,7 +2166,13 @@ int dbsync_register_functions (sqlite3 *db, char **pzErrMsg) {
if (rc != SQLITE_OK) return rc;
rc = dbsync_register_function(db, "cloudsync_payload_apply", dbsync_payload_decode, -1, pzErrMsg, ctx, NULL);
if (rc != SQLITE_OK) return rc;
-
+
+ rc = sqlite3_create_module(db, "cloudsync_payload_chunks", &cloudsync_payload_chunks_module, (void *)ctx);
+ if (rc != SQLITE_OK) return rc;
+
+ rc = dbsync_register_function(db, "cloudsync_payload_blob_checked", dbsync_payload_blob_checked, 5, pzErrMsg, ctx, NULL);
+ if (rc != SQLITE_OK) return rc;
+
#ifdef CLOUDSYNC_DESKTOP_OS
rc = dbsync_register_function(db, "cloudsync_payload_save", dbsync_payload_save, 1, pzErrMsg, ctx, NULL);
if (rc != SQLITE_OK) return rc;
diff --git a/src/sqlite/sql_sqlite.c b/src/sqlite/sql_sqlite.c
index 471ae9be..a6b1d7ac 100644
--- a/src/sqlite/sql_sqlite.c
+++ b/src/sqlite/sql_sqlite.c
@@ -65,7 +65,8 @@ const char * const SQL_SETTINGS_CLEANUP_DROP_ALL =
"DROP TABLE IF EXISTS cloudsync_settings; "
"DROP TABLE IF EXISTS cloudsync_site_id; "
"DROP TABLE IF EXISTS cloudsync_table_settings; "
- "DROP TABLE IF EXISTS cloudsync_schema_versions; ";
+ "DROP TABLE IF EXISTS cloudsync_schema_versions; "
+ "DROP TABLE IF EXISTS cloudsync_payload_fragments; ";
// MARK: CloudSync
@@ -280,6 +281,37 @@ const char * const SQL_CHANGES_INSERT_ROW =
"INSERT INTO cloudsync_changes(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) "
"VALUES (?,?,?,?,?,?,?,?,?);";
+const char * const SQL_PAYLOAD_FRAGMENTS_CREATE_TABLE =
+ "CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments ("
+ "value_id TEXT NOT NULL, part_index INTEGER NOT NULL, part_count INTEGER NOT NULL, total_size INTEGER NOT NULL, "
+ "checksum TEXT NOT NULL, created_at INTEGER NOT NULL DEFAULT (unixepoch()), "
+ "tbl TEXT NOT NULL, pk BLOB NOT NULL, col_name TEXT NOT NULL, col_version INTEGER NOT NULL, db_version INTEGER NOT NULL, "
+ "site_id BLOB NOT NULL, cl INTEGER NOT NULL, seq INTEGER NOT NULL, fragment BLOB NOT NULL, "
+ "PRIMARY KEY(value_id, part_index)) WITHOUT ROWID;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_UPSERT =
+ "INSERT OR REPLACE INTO cloudsync_payload_fragments "
+ "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) "
+ "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_COUNT =
+ "SELECT COUNT(*), MIN(part_count), MAX(part_count), MIN(total_size), MAX(total_size), "
+ "MIN(checksum), MAX(checksum), MIN(part_index), MAX(part_index) "
+ "FROM cloudsync_payload_fragments WHERE value_id=?;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_SELECT =
+ "SELECT fragment, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, checksum "
+ "FROM cloudsync_payload_fragments WHERE value_id=? ORDER BY part_index ASC;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_DELETE =
+ "DELETE FROM cloudsync_payload_fragments WHERE value_id=?;";
+
+const char * const SQL_PAYLOAD_FRAGMENTS_CLEANUP_STALE =
+ "DELETE FROM cloudsync_payload_fragments "
+ "WHERE created_at < ? AND value_id IN ("
+ "SELECT value_id FROM cloudsync_payload_fragments GROUP BY value_id "
+ "HAVING COUNT(*) < MAX(part_count));";
+
// MARK: Blocks (block-level LWW)
const char * const SQL_BLOCKS_CREATE_TABLE =
diff --git a/src/utils.c b/src/utils.c
index fff6cdd2..371e29c9 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -112,11 +112,40 @@ char *cloudsync_uuid_v7_stringify (uint8_t uuid[UUID_LEN], char value[UUID_STR_M
char *cloudsync_uuid_v7_string (char value[UUID_STR_MAXLEN], bool dash_format) {
uint8_t uuid[UUID_LEN];
-
+
if (cloudsync_uuid_v7(uuid) != 0) return NULL;
return cloudsync_uuid_v7_stringify(uuid, value, dash_format);
}
+static int cloudsync_hex_nibble (char c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ return -1;
+}
+
+int cloudsync_uuid_v7_parse (const char *str, int len, uint8_t out[UUID_LEN]) {
+ if (!str || !out) return -1;
+ if (len < 0) len = (int)strlen(str);
+
+ // Accept the canonical dashed form (8-4-4-4-12) or bare 32-hex; dashes,
+ // if present, must be at the canonical positions. Parse 32 hex nibbles.
+ int nibbles = 0;
+ for (int i = 0; i < len; ++i) {
+ char c = str[i];
+ if (c == '-') continue;
+ int hi = cloudsync_hex_nibble(c);
+ if (hi < 0) return -1;
+ if (i + 1 >= len) return -1;
+ int lo = cloudsync_hex_nibble(str[i + 1]);
+ if (lo < 0) return -1;
+ if (nibbles >= UUID_LEN) return -1;
+ out[nibbles++] = (uint8_t)((hi << 4) | lo);
+ ++i; // consumed the low nibble too
+ }
+ return (nibbles == UUID_LEN) ? 0 : -1;
+}
+
int cloudsync_uuid_v7_compare (uint8_t value1[UUID_LEN], uint8_t value2[UUID_LEN]) {
// reconstruct the timestamp by reversing the bit shifts and combining the bytes
uint64_t t1 = ((uint64_t)value1[0] << 40) | ((uint64_t)value1[1] << 32) | ((uint64_t)value1[2] << 24) |
diff --git a/src/utils.h b/src/utils.h
index 3f0e0980..f71b1fe6 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -133,6 +133,10 @@ int cloudsync_uuid_v7 (uint8_t value[UUID_LEN]);
int cloudsync_uuid_v7_compare (uint8_t value1[UUID_LEN], uint8_t value2[UUID_LEN]);
char *cloudsync_uuid_v7_string (char value[UUID_STR_MAXLEN], bool dash_format);
char *cloudsync_uuid_v7_stringify (uint8_t uuid[UUID_LEN], char value[UUID_STR_MAXLEN], bool dash_format);
+// Parse a UUID string into its UUID_LEN binary form. Tolerant: accepts the
+// canonical dashed form (36 chars) and the bare 32-hex form, case-insensitive.
+// Returns 0 on success, -1 on malformed input.
+int cloudsync_uuid_v7_parse (const char *str, int len, uint8_t out[UUID_LEN]);
uint64_t fnv1a_hash(const char *data, size_t len);
char *cloudsync_string_replace_prefix(const char *input, char *prefix, char *replacement);
diff --git a/test/chunk_bench.c b/test/chunk_bench.c
new file mode 100644
index 00000000..4321ac2f
--- /dev/null
+++ b/test/chunk_bench.c
@@ -0,0 +1,177 @@
+//
+// chunk_bench.c
+// cloudsync
+//
+// Local-only benchmark for the positional /check drain: build a window of N
+// chunks and time paging the whole window one chunk per call via the
+// (resume_db_version, resume_seq, resume_frag_offset) cursor on
+// cloudsync_payload_chunks. Reports wall time and per-chunk cost so the
+// computational growth of the drain (currently O(N^2): each resume re-scans
+// cloudsync_changes) can be tracked — e.g. to confirm a future indexed
+// (db_version, seq) seek flattens it to O(N).
+//
+// Env: CHUNK_BENCH_ROWS (default 400), CHUNK_BENCH_ROW_BYTES (default 60000),
+// CHUNK_BENCH_TXNS (default 1; rows split across this many db_versions),
+// CHUNK_BENCH_REPEATS (default 5), CHUNK_BENCH_CHUNK_SIZE (default 262144).
+//
+
+#include
+#include
+#include
+#include
+#include
+#include "sqlite3.h"
+
+#define DB_PATH "dist/chunk-bench.sqlite"
+#define EXT_PATH "./dist/cloudsync"
+
+static double monotonic_ms(void) {
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ((double)ts.tv_sec * 1000.0) + ((double)ts.tv_nsec / 1000000.0);
+}
+
+static int env_int(const char *name, int dflt) {
+ const char *v = getenv(name);
+ if (!v || !*v) return dflt;
+ char *end = NULL;
+ long p = strtol(v, &end, 10);
+ if (!end || *end != '\0' || p <= 0) return dflt;
+ return (int)p;
+}
+
+static int db_exec(sqlite3 *db, const char *sql) {
+ char *err = NULL;
+ int rc = sqlite3_exec(db, sql, NULL, NULL, &err);
+ if (rc != SQLITE_OK) {
+ fprintf(stderr, "exec failed: %s: %s\n", sql, err ? err : sqlite3_errmsg(db));
+ sqlite3_free(err);
+ }
+ return rc;
+}
+
+// Drain the whole window via the positional cursor, one chunk per query. Returns
+// the chunk count and accumulates total payload bytes touched into *bytes.
+static int drain_positional(sqlite3 *db, int *chunks_out, long long *bytes_out) {
+ const char *first_sql =
+ "SELECT payload, next_db_version, next_seq, next_frag_offset, is_final, watermark_db_version "
+ "FROM cloudsync_payload_chunks WHERE since_db_version=0 LIMIT 1;";
+ const char *resume_sql =
+ "SELECT payload, next_db_version, next_seq, next_frag_offset, is_final "
+ "FROM cloudsync_payload_chunks "
+ "WHERE until_db_version=?1 AND resume_db_version=?2 AND resume_seq=?3 AND resume_frag_offset=?4 LIMIT 1;";
+ sqlite3_stmt *first = NULL, *resume = NULL;
+ int rc = sqlite3_prepare_v2(db, first_sql, -1, &first, NULL);
+ if (rc != SQLITE_OK) goto done;
+ rc = sqlite3_prepare_v2(db, resume_sql, -1, &resume, NULL);
+ if (rc != SQLITE_OK) goto done;
+
+ int chunks = 0;
+ long long bytes = 0;
+ long long watermark = 0, rdbv = 0, rseq = 0, rfrag = 0;
+ bool is_final = false;
+
+ rc = sqlite3_step(first);
+ if (rc == SQLITE_ROW) {
+ bytes += sqlite3_column_bytes(first, 0);
+ rdbv = sqlite3_column_int64(first, 1);
+ rseq = sqlite3_column_int64(first, 2);
+ rfrag = sqlite3_column_int64(first, 3);
+ is_final = sqlite3_column_int(first, 4) != 0;
+ watermark = sqlite3_column_int64(first, 5);
+ chunks++;
+ } else if (rc == SQLITE_DONE) {
+ rc = SQLITE_OK;
+ goto done; // empty window
+ } else {
+ goto done;
+ }
+
+ while (!is_final) {
+ sqlite3_reset(resume);
+ sqlite3_bind_int64(resume, 1, watermark);
+ sqlite3_bind_int64(resume, 2, rdbv);
+ sqlite3_bind_int64(resume, 3, rseq);
+ sqlite3_bind_int64(resume, 4, rfrag);
+ rc = sqlite3_step(resume);
+ if (rc != SQLITE_ROW) { if (rc == SQLITE_DONE) rc = SQLITE_OK; break; }
+ bytes += sqlite3_column_bytes(resume, 0);
+ rdbv = sqlite3_column_int64(resume, 1);
+ rseq = sqlite3_column_int64(resume, 2);
+ rfrag = sqlite3_column_int64(resume, 3);
+ is_final = sqlite3_column_int(resume, 4) != 0;
+ chunks++;
+ }
+ rc = SQLITE_OK;
+ *chunks_out = chunks;
+ *bytes_out = bytes;
+
+done:
+ if (first) sqlite3_finalize(first);
+ if (resume) sqlite3_finalize(resume);
+ return rc;
+}
+
+int main(void) {
+ int rows = env_int("CHUNK_BENCH_ROWS", 400);
+ int row_bytes = env_int("CHUNK_BENCH_ROW_BYTES", 60000);
+ int repeats = env_int("CHUNK_BENCH_REPEATS", 5);
+ int chunk_size = env_int("CHUNK_BENCH_CHUNK_SIZE", 262144);
+
+ remove(DB_PATH);
+ sqlite3 *db = NULL;
+ if (sqlite3_open(DB_PATH, &db) != SQLITE_OK) { fprintf(stderr, "open failed\n"); return 1; }
+ if (sqlite3_enable_load_extension(db, 1) != SQLITE_OK) return 1;
+ if (db_exec(db, "SELECT load_extension('" EXT_PATH "');") != SQLITE_OK) return 1;
+
+ char setup[256];
+ snprintf(setup, sizeof(setup),
+ "CREATE TABLE chunk_bench (id TEXT PRIMARY KEY, body BLOB);"
+ "SELECT cloudsync_init('chunk_bench');"
+ "SELECT cloudsync_set('payload_max_chunk_size', '%d');", chunk_size);
+ if (db_exec(db, setup) != SQLITE_OK) return 1;
+
+ // Split the rows across CHUNK_BENCH_TXNS transactions: each is one db_version,
+ // so TXNS=1 is the pathological single-version window and TXNS=rows is the
+ // many-versions case a real /check window resembles. Incompressible bodies keep
+ // the window many-chunked.
+ int txns = env_int("CHUNK_BENCH_TXNS", 1);
+ if (txns < 1) txns = 1;
+ if (txns > rows) txns = rows;
+ int idbase = 0;
+ for (int t = 0; t < txns; ++t) {
+ int n = rows / txns + (t < rows % txns ? 1 : 0);
+ if (n <= 0) continue;
+ char insert[256];
+ snprintf(insert, sizeof(insert),
+ "WITH RECURSIVE c(i) AS (SELECT %d UNION ALL SELECT i+1 FROM c WHERE i < %d) "
+ "INSERT INTO chunk_bench(id, body) SELECT printf('row-%%06d', i), randomblob(%d) FROM c;",
+ idbase + 1, idbase + n, row_bytes);
+ if (db_exec(db, insert) != SQLITE_OK) return 1;
+ idbase += n;
+ }
+
+ int chunks = 0;
+ long long bytes = 0;
+ double best = 1e18, sum = 0;
+ for (int r = 0; r < repeats; ++r) {
+ double t0 = monotonic_ms();
+ if (drain_positional(db, &chunks, &bytes) != SQLITE_OK) { fprintf(stderr, "positional drain failed\n"); return 1; }
+ double dt = monotonic_ms() - t0;
+ sum += dt; if (dt < best) best = dt;
+ }
+
+ double mean = sum / repeats;
+ printf("\nPositional /check drain benchmark (local SQLite, no network)\n");
+ printf("rows: %d row_bytes: %d txns: %d chunk_size: %d repeats: %d\n",
+ rows, row_bytes, txns, chunk_size, repeats);
+ printf("chunks: %d payload_bytes: %lld\n", chunks, bytes);
+ printf("drain: best=%.2f ms mean=%.2f ms\n", best, mean);
+ if (chunks > 0)
+ printf("per-chunk: best=%.3f ms throughput: %.1f MB/s\n",
+ best / chunks, (double)bytes / 1024.0 / 1024.0 / (best / 1000.0));
+
+ sqlite3_close(db);
+ remove(DB_PATH);
+ return 0;
+}
diff --git a/test/integration.c b/test/integration.c
index 6966fde5..fc39efb6 100644
--- a/test/integration.c
+++ b/test/integration.c
@@ -4,6 +4,7 @@
//
// Created by Gioele Cantoni on 05/06/25.
// Set INTEGRATION_TEST_OFFLINE_DATABASE_ID and INTEGRATION_TEST_DATABASE_ID environment variables before running this test.
+// Set INTEGRATION_TEST_CHUNKED_DATABASE_ID to enable the chunked-payload e2e test against an isolated remote database.
//
#include
@@ -36,6 +37,7 @@
#define DB_PATH "health-track.sqlite"
#define EXT_PATH "./dist/cloudsync"
+#define TEST_SKIPPED 100001
#define RCHECK if (rc != SQLITE_OK) goto abort_test;
#define ERROR_MSG if (rc != SQLITE_OK) printf("Error: %s\n", sqlite3_errmsg(db));
#define TERMINATE if (db) { db_exec(db, "SELECT cloudsync_terminate();"); }
@@ -51,6 +53,8 @@ typedef struct {
} value;
} expected_t;
+int open_load_ext(const char *db_path, sqlite3 **out_db);
+
static int callback(void *data, int argc, char **argv, char **names) {
expected_t *expect = (expected_t *)data;
@@ -155,6 +159,226 @@ int db_expect_str (sqlite3 *db, const char *sql, const char *expect) {
return rc;
}
+int db_select_int (sqlite3 *db, const char *sql, int *out) {
+ sqlite3_stmt *stmt = NULL;
+ int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
+ if (rc != SQLITE_OK) {
+ printf("Error while preparing %s: %s\n", sql, sqlite3_errmsg(db));
+ return rc;
+ }
+
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW) {
+ printf("Error while executing %s: expected one row, got rc=%d\n", sql, rc);
+ sqlite3_finalize(stmt);
+ return SQLITE_ERROR;
+ }
+
+ *out = sqlite3_column_int(stmt, 0);
+
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_DONE) {
+ printf("Error while executing %s: expected one row only, got rc=%d\n", sql, rc);
+ sqlite3_finalize(stmt);
+ return SQLITE_ERROR;
+ }
+
+ return sqlite3_finalize(stmt);
+}
+
+// Reads the receive probe — chunks (int), complete (int), error (text) — from a
+// single row of one network call. error_out is set to "" when receive.error is
+// absent (the common success case) and to the message otherwise, so a swallowed
+// client-side apply failure surfaces instead of timing out as "not received".
+// The call must be a bare/read-only SELECT (e.g. a subquery), never
+// CREATE TABLE ... AS SELECT, or the apply path cannot open its savepoint
+// ("SQL statements in progress") and the download is silently dropped.
+int db_select_receive (sqlite3 *db, const char *sql, int *chunks, int *complete, char *error_out, int error_len) {
+ if (error_len > 0) error_out[0] = '\0';
+
+ sqlite3_stmt *stmt = NULL;
+ int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
+ if (rc != SQLITE_OK) {
+ printf("Error while preparing %s: %s\n", sql, sqlite3_errmsg(db));
+ return rc;
+ }
+
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW) {
+ printf("Error while executing %s: expected one row, got rc=%d\n", sql, rc);
+ sqlite3_finalize(stmt);
+ return SQLITE_ERROR;
+ }
+
+ *chunks = sqlite3_column_int(stmt, 0);
+ *complete = sqlite3_column_int(stmt, 1);
+ const unsigned char *err = sqlite3_column_text(stmt, 2);
+ if (err && error_len > 0) snprintf(error_out, error_len, "%s", (const char *)err);
+
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_DONE) {
+ printf("Error while executing %s: expected one row only, got rc=%d\n", sql, rc);
+ sqlite3_finalize(stmt);
+ return SQLITE_ERROR;
+ }
+
+ return sqlite3_finalize(stmt);
+}
+
+int db_expect_min (sqlite3 *db, const char *sql, int expect_min) {
+ int value = 0;
+ int rc = db_select_int(db, sql, &value);
+ if (rc != SQLITE_OK) return rc;
+ if (value < expect_min) {
+ printf("Error: expected %s to be >= %d, got %d\n", sql, expect_min, value);
+ return SQLITE_ERROR;
+ }
+ return SQLITE_OK;
+}
+
+// Run a send that is expected to succeed and assert it didn't fail at the protocol
+// level: status is not "error" and the server reported no per-chunk failure
+// (send.lastFailure is omitted on success). Catches a server-reported apply/check
+// failure that doesn't raise a SQL error — invisible to a plain db_exec of the send.
+int db_send_ok (sqlite3 *db) {
+ return db_expect_int(db,
+ "SELECT (j ->> '$.send.status') <> 'error' AND (j ->> '$.send.lastFailure') IS NULL "
+ "FROM (SELECT cloudsync_network_send_changes() AS j);", 1);
+}
+
+// Send, then poll until the server's optimistic version (send.serverVersion) catches
+// up to send.localVersion — the change is durably covered with no gap. Robust to the
+// server's asynchronous apply: the first call sends, later calls are no-ops that
+// re-read status. Fails if it has not converged within max_attempts.
+int db_send_await_converge (sqlite3 *db, int max_attempts, int delay_ms) {
+ const char *sql =
+ "SELECT (j ->> '$.send.serverVersion') >= (j ->> '$.send.localVersion') "
+ "FROM (SELECT cloudsync_network_send_changes() AS j);";
+ for (int i = 0; i < max_attempts; i++) {
+ int converged = 0;
+ int rc = db_select_int(db, sql, &converged);
+ if (rc != SQLITE_OK) return rc;
+ if (converged) return SQLITE_OK;
+ if (i + 1 < max_attempts) sqlite3_sleep(delay_ms);
+ }
+ printf("Error: send did not converge (serverVersion < localVersion) after %d attempts\n", max_attempts);
+ return SQLITE_ERROR;
+}
+
+int integration_network_init(sqlite3 *db, const char *database_id, char *network_init, size_t network_init_len) {
+ if (!database_id) {
+ fprintf(stderr, "Error: integration database ID not set.\n");
+ return SQLITE_ERROR;
+ }
+
+ const char* custom_address = getenv("INTEGRATION_TEST_CLOUDSYNC_ADDRESS");
+ if (custom_address) {
+ snprintf(network_init, network_init_len,
+ "SELECT cloudsync_network_init_custom('%s', '%s');", custom_address, database_id);
+ } else {
+ snprintf(network_init, network_init_len,
+ "SELECT cloudsync_network_init('%s');", database_id);
+ }
+
+ int rc = db_exec(db, network_init);
+ if (rc != SQLITE_OK) return rc;
+
+ const char* apikey = getenv("INTEGRATION_TEST_APIKEY");
+ if (apikey) {
+ char set_apikey[512];
+ snprintf(set_apikey, sizeof(set_apikey),
+ "SELECT cloudsync_network_set_apikey('%s');", apikey);
+ rc = db_exec(db, set_apikey);
+ }
+
+ return rc;
+}
+
+int test_chunked_schema_init(sqlite3 *db) {
+ int rc = db_exec(db,
+ "CREATE TABLE IF NOT EXISTS chunked_payload_items ("
+ "id TEXT PRIMARY KEY NOT NULL,"
+ "body TEXT NOT NULL DEFAULT ''"
+ ");");
+ if (rc != SQLITE_OK) return rc;
+
+ return db_exec(db, "SELECT cloudsync_init('chunked_payload_items');");
+}
+
+int test_chunked_failure_schema_init(sqlite3 *db) {
+ int rc = db_exec(db,
+ "CREATE TABLE IF NOT EXISTS chunked_payload_failure_items ("
+ "id TEXT PRIMARY KEY NOT NULL,"
+ "body TEXT NOT NULL DEFAULT ''"
+ ");");
+ if (rc != SQLITE_OK) return rc;
+
+ return db_exec(db, "SELECT cloudsync_init('chunked_payload_failure_items');");
+}
+
+int test_chunked_sender_open(sqlite3 **sender, char *network_init, size_t network_init_len) {
+ const char* test_db_id = getenv("INTEGRATION_TEST_CHUNKED_DATABASE_ID");
+ if (!test_db_id || !*test_db_id) {
+ return TEST_SKIPPED;
+ }
+
+ int rc = open_load_ext(":memory:", sender);
+ if (rc != SQLITE_OK) return rc;
+ rc = integration_network_init(*sender, test_db_id, network_init, network_init_len);
+ if (rc != SQLITE_OK) return rc;
+
+ return rc;
+}
+
+int test_chunked_pair_open(sqlite3 **sender, sqlite3 **receiver, char *network_init, size_t network_init_len) {
+ const char* test_db_id = getenv("INTEGRATION_TEST_CHUNKED_DATABASE_ID");
+ if (!test_db_id || !*test_db_id) {
+ return TEST_SKIPPED;
+ }
+
+ int rc = open_load_ext(":memory:", sender);
+ if (rc != SQLITE_OK) return rc;
+ rc = test_chunked_schema_init(*sender);
+ if (rc != SQLITE_OK) return rc;
+ rc = integration_network_init(*sender, test_db_id, network_init, network_init_len);
+ if (rc != SQLITE_OK) return rc;
+
+ rc = open_load_ext(":memory:", receiver);
+ if (rc != SQLITE_OK) return rc;
+ rc = test_chunked_schema_init(*receiver);
+ if (rc != SQLITE_OK) return rc;
+ rc = db_exec(*receiver, network_init);
+ if (rc != SQLITE_OK) return rc;
+
+ const char* apikey = getenv("INTEGRATION_TEST_APIKEY");
+ if (apikey) {
+ char set_apikey[512];
+ snprintf(set_apikey, sizeof(set_apikey),
+ "SELECT cloudsync_network_set_apikey('%s');", apikey);
+ rc = db_exec(*receiver, set_apikey);
+ }
+
+ return rc;
+}
+
+void test_chunked_pair_close(sqlite3 *sender, sqlite3 *receiver) {
+ if (sender) {
+ db_exec(sender, "SELECT cloudsync_terminate();");
+ sqlite3_close(sender);
+ }
+ if (receiver) {
+ db_exec(receiver, "SELECT cloudsync_terminate();");
+ sqlite3_close(receiver);
+ }
+}
+
+void test_chunked_sender_close(sqlite3 *sender) {
+ if (sender) {
+ db_exec(sender, "SELECT cloudsync_terminate();");
+ sqlite3_close(sender);
+ }
+}
+
int open_load_ext(const char *db_path, sqlite3 **out_db) {
sqlite3 *db = NULL;
int rc = sqlite3_open(db_path, &db);
@@ -333,7 +557,7 @@ int test_enable_disable(const char *db_path) {
rc = db_exec(db, set_apikey); RCHECK
}
- rc = db_exec(db, "SELECT cloudsync_network_send_changes();"); RCHECK
+ rc = db_send_ok(db); RCHECK
rc = db_exec(db, "SELECT cloudsync_cleanup('users');"); RCHECK
rc = db_exec(db, "SELECT cloudsync_cleanup('activities');"); RCHECK
rc = db_exec(db, "SELECT cloudsync_cleanup('workouts');"); RCHECK
@@ -368,12 +592,760 @@ int test_enable_disable(const char *db_path) {
rc = db_expect_int(db2, sql, 1); RCHECK
rc = db_exec(db2, "SELECT cloudsync_terminate();"); RCHECK
-
+
sqlite3_close(db2);
ABORT_TEST
}
+// Reproduces the spurious-gap bug in the send path: when the local db_version clock
+// has been advanced past the site's own changes — as happens when applied remote
+// changes bump the clock — the send announces only the change's own db_version range,
+// so the skipped versions stay a gap in the server's per-site coverage and
+// lastOptimisticVersion can never reach localVersion. cloudsync_db_version_next()
+// forces the jump deterministically, no second database required. Expected to FAIL
+// until the send announces the covered window [last_sent+1 .. watermark].
+int test_send_gap_from_clock_hole(const char *db_path) {
+ sqlite3 *db = NULL;
+ int rc = open_load_ext(db_path, &db); RCHECK
+ rc = db_init(db); RCHECK // create users/activities/workouts (this db is fresh)
+
+ char value[UUID_STR_MAXLEN];
+ cloudsync_uuid_v7_string(value, true);
+ char sql[256];
+
+ rc = db_exec(db, "SELECT cloudsync_init('users');"); RCHECK
+ rc = db_exec(db, "SELECT cloudsync_init('activities');"); RCHECK
+ rc = db_exec(db, "SELECT cloudsync_init('workouts');"); RCHECK
+
+ // Force the next local change to land at db_version 10, leaving 1..9 with no
+ // local-site change (the "hole" that merging applied remote changes would create).
+ rc = db_exec(db, "SELECT cloudsync_db_version_next(10);"); RCHECK
+
+ snprintf(sql, sizeof(sql), "INSERT INTO users (id, name) VALUES ('%s', '%s');", value, value);
+ rc = db_exec(db, sql); RCHECK
+
+ // sanity: the change really landed at db_version 10, so there is a leading hole
+ rc = db_expect_int(db, "SELECT cloudsync_db_version();", 10); RCHECK
+
+ // init network
+ char network_init[1024];
+ const char* test_db_id = getenv("INTEGRATION_TEST_DATABASE_ID");
+ if (!test_db_id) {
+ fprintf(stderr, "Error: INTEGRATION_TEST_DATABASE_ID not set.\n");
+ exit(1);
+ }
+ const char* custom_address = getenv("INTEGRATION_TEST_CLOUDSYNC_ADDRESS");
+ if (custom_address) {
+ snprintf(network_init, sizeof(network_init),
+ "SELECT cloudsync_network_init_custom('%s', '%s');", custom_address, test_db_id);
+ } else {
+ snprintf(network_init, sizeof(network_init),
+ "SELECT cloudsync_network_init('%s');", test_db_id);
+ }
+ rc = db_exec(db, network_init); RCHECK
+
+ const char* apikey = getenv("INTEGRATION_TEST_APIKEY");
+ if (apikey) {
+ char set_apikey[512];
+ snprintf(set_apikey, sizeof(set_apikey),
+ "SELECT cloudsync_network_set_apikey('%s');", apikey);
+ rc = db_exec(db, set_apikey); RCHECK
+ }
+
+ // Send, then poll until the server's optimistic version (serverVersion) reaches
+ // localVersion (10). With contiguous coverage it converges; with the gap bug it
+ // never does — serverVersion stays at 0 because db_versions 1..9 are reported
+ // missing. Polling absorbs the server's asynchronous apply.
+ rc = db_send_await_converge(db, 8, 1000); RCHECK
+
+ rc = db_exec(db, "SELECT cloudsync_cleanup('users');"); RCHECK
+ rc = db_exec(db, "SELECT cloudsync_cleanup('activities');"); RCHECK
+ rc = db_exec(db, "SELECT cloudsync_cleanup('workouts');"); RCHECK
+
+ABORT_TEST
+}
+
+int test_chunked_payload_paths(void) {
+ int rc = SQLITE_OK;
+ sqlite3 *sender = NULL;
+ sqlite3 *receiver = NULL;
+ char network_init[1024];
+ char row_id[UUID_STR_MAXLEN];
+ char sql[1024];
+ bool found = false;
+ bool cleanup_remote_row = false;
+
+ rc = test_chunked_pair_open(&sender, &receiver, network_init, sizeof(network_init));
+ if (rc == TEST_SKIPPED) return TEST_SKIPPED;
+ if (rc != SQLITE_OK) goto cleanup;
+
+ cloudsync_uuid_v7_string(row_id, true);
+ rc = db_exec(sender, "SELECT cloudsync_set('payload_max_chunk_size', '262144');"); if (rc != SQLITE_OK) goto cleanup;
+ snprintf(sql, sizeof(sql),
+ "INSERT INTO chunked_payload_items (id, body) "
+ "VALUES ('%s', lower(hex(zeroblob(360000))));",
+ row_id);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_expect_min(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks();", 2); if (rc != SQLITE_OK) goto cleanup;
+ rc = db_expect_min(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks() WHERE hex(substr(payload,5,1))='03';", 2); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_send_ok(sender); if (rc != SQLITE_OK) goto cleanup;
+ cleanup_remote_row = true;
+
+ for (int attempt = 0; attempt < 30; ++attempt) {
+ int matches = 0;
+
+ // Exercises the deprecated cloudsync_network_check_changes() alias on purpose
+ // (backward-compatibility coverage); cloudsync_network_receive_changes() is the
+ // canonical name and is covered by the rowset and capped-drain tests.
+ rc = db_exec(receiver, "SELECT cloudsync_network_check_changes();");
+ if (rc != SQLITE_OK) goto cleanup;
+
+ snprintf(sql, sizeof(sql),
+ "SELECT COUNT(*) FROM chunked_payload_items "
+ "WHERE id='%s' "
+ "AND length(body)=720000 "
+ "AND body=lower(hex(zeroblob(360000)));",
+ row_id);
+ rc = db_select_int(receiver, sql, &matches);
+ if (rc != SQLITE_OK) goto cleanup;
+ if (matches == 1) {
+ found = true;
+ break;
+ }
+
+ sqlite3_sleep(500);
+ }
+
+ if (!found) {
+ printf("Error: chunked e2e row %s was not received.\n", row_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+cleanup:
+ if (cleanup_remote_row && sender) {
+ snprintf(sql, sizeof(sql), "DELETE FROM chunked_payload_items WHERE id='%s';", row_id);
+ if (db_exec(sender, sql) == SQLITE_OK) {
+ db_exec(sender, "SELECT cloudsync_network_send_changes();");
+ }
+ }
+ test_chunked_pair_close(sender, receiver);
+ return rc;
+}
+
+int test_chunked_payload_rowset_path(void) {
+ int rc = SQLITE_OK;
+ sqlite3 *sender = NULL;
+ sqlite3 *receiver = NULL;
+ char network_init[1024];
+ char batch_id[UUID_STR_MAXLEN];
+ char sql[1024];
+ bool found = false;
+ bool cleanup_remote_rows = false;
+ const int row_count = 500;
+ const int body_bytes = 1600;
+
+ rc = test_chunked_pair_open(&sender, &receiver, network_init, sizeof(network_init));
+ if (rc == TEST_SKIPPED) return TEST_SKIPPED;
+ if (rc != SQLITE_OK) goto cleanup;
+
+ cloudsync_uuid_v7_string(batch_id, true);
+ rc = db_exec(sender, "SELECT cloudsync_set('payload_max_chunk_size', '262144');"); if (rc != SQLITE_OK) goto cleanup;
+ snprintf(sql, sizeof(sql),
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < %d) "
+ "INSERT INTO chunked_payload_items (id, body) "
+ "SELECT '%s-' || printf('%%03d', i), lower(hex(zeroblob(%d))) FROM c;",
+ row_count, batch_id, body_bytes);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_expect_min(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks();", 2); if (rc != SQLITE_OK) goto cleanup;
+ rc = db_expect_int(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks() WHERE hex(substr(payload,5,1))='03';", 0); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_send_ok(sender); if (rc != SQLITE_OK) goto cleanup;
+ cleanup_remote_rows = true;
+
+ for (int attempt = 0; attempt < 30; ++attempt) {
+ int matches = 0;
+
+ rc = db_exec(receiver, "SELECT cloudsync_network_receive_changes();");
+ if (rc != SQLITE_OK) goto cleanup;
+
+ snprintf(sql, sizeof(sql),
+ "SELECT COUNT(*) FROM chunked_payload_items "
+ "WHERE id LIKE '%s-%%' "
+ "AND length(body)=%d "
+ "AND body=lower(hex(zeroblob(%d)));",
+ batch_id, body_bytes * 2, body_bytes);
+ rc = db_select_int(receiver, sql, &matches);
+ if (rc != SQLITE_OK) goto cleanup;
+ if (matches == row_count) {
+ found = true;
+ break;
+ }
+
+ sqlite3_sleep(500);
+ }
+
+ if (!found) {
+ printf("Error: chunked rowset e2e batch %s was not received.\n", batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+cleanup:
+ if (cleanup_remote_rows && sender) {
+ snprintf(sql, sizeof(sql), "DELETE FROM chunked_payload_items WHERE id LIKE '%s-%%';", batch_id);
+ if (db_exec(sender, sql) == SQLITE_OK) {
+ db_exec(sender, "SELECT cloudsync_network_send_changes();");
+ }
+ }
+ test_chunked_pair_close(sender, receiver);
+ return rc;
+}
+
+// Verifies that a single cloudsync_network_sync() call drains an entire multi-chunk
+// download (no per-chunk extra sync() calls), and that the new receive.chunks /
+// receive.complete fields report the drain. A large batch of incompressible rows is
+// sent so the server's check response spans several pages; one sync() on the receiver
+// must pull them all and report chunks>1 with complete=true.
+int test_chunked_payload_single_sync_drain(void) {
+ int rc = SQLITE_OK;
+ sqlite3 *sender = NULL;
+ sqlite3 *receiver = NULL;
+ char network_init[1024];
+ char batch_id[UUID_STR_MAXLEN];
+ char sql[1024];
+ bool found = false;
+ bool observed_multi_chunk = false; // saw chunks>1 && complete=1 in a SINGLE sync() call
+ bool cleanup_remote_rows = false;
+ // INCOMPRESSIBLE random bodies (distinct per row) so the payload can't compress
+ // below the server's page size. Multi-page splitting is driven by the TENANT's
+ // payload_max_chunk_size (the server-side check-response page size, distinct from
+ // the client upload chunk size set below), which must be small on
+ // INTEGRATION_TEST_CHUNKED_DATABASE_ID — set it to 262144 to match the client.
+ // ~1 MB of incompressible data over a 256 KB page cap yields several pages
+ // regardless of tenant backlog, so one sync() must drain chunks>1. (zeroblob bodies
+ // would compress to ~nothing and collapse back to a single page.)
+ const int row_count = 500;
+ const int body_rand_bytes = 2048; // 4096-char bodies; ~1 MB random / ~2 MB serialized
+
+ rc = test_chunked_pair_open(&sender, &receiver, network_init, sizeof(network_init));
+ if (rc == TEST_SKIPPED) return TEST_SKIPPED;
+ if (rc != SQLITE_OK) goto cleanup;
+
+ cloudsync_uuid_v7_string(batch_id, true);
+ rc = db_exec(sender, "SELECT cloudsync_set('payload_max_chunk_size', '262144');"); if (rc != SQLITE_OK) goto cleanup;
+ snprintf(sql, sizeof(sql),
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < %d) "
+ "INSERT INTO chunked_payload_items (id, body) "
+ "SELECT '%s-' || printf('%%03d', i), lower(hex(randomblob(%d))) FROM c;",
+ row_count, batch_id, body_rand_bytes);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+
+ // Sender splits into multiple non-fragment chunks.
+ rc = db_expect_min(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks();", 2); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_send_ok(sender); if (rc != SQLITE_OK) goto cleanup;
+ cleanup_remote_rows = true;
+
+ for (int attempt = 0; attempt < 40; ++attempt) {
+ int chunks = 0, complete = 0, matches = 0;
+ char recv_err[512];
+
+ // Run exactly one sync() and read both receive fields from that single call.
+ // The call must stay a read-only SELECT (here a subquery): wrapping it in
+ // CREATE TABLE ... AS SELECT would keep the outer statement stepping while the
+ // apply path tries to open its savepoint, which SQLite rejects ("SQL statements
+ // in progress") — silently leaving the download unapplied.
+ rc = db_select_receive(receiver,
+ "SELECT j ->> '$.receive.chunks', j ->> '$.receive.complete', j ->> '$.receive.error' "
+ "FROM (SELECT cloudsync_network_sync(250, 30) AS j);",
+ &chunks, &complete, recv_err, sizeof(recv_err));
+ if (rc != SQLITE_OK) goto cleanup;
+ // A client-side apply error is swallowed into receive.error; fail loudly
+ // instead of looping until the "not received" timeout.
+ if (recv_err[0]) {
+ printf("Error: chunked single-sync drain batch %s reported receive.error: %s\n", batch_id, recv_err);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (chunks > 1 && complete == 1) observed_multi_chunk = true;
+
+ // Random bodies can't be matched by value; assert the full set arrived with
+ // each row at its expected length (content correctness is covered elsewhere).
+ snprintf(sql, sizeof(sql),
+ "SELECT COUNT(*) FROM chunked_payload_items "
+ "WHERE id LIKE '%s-%%' "
+ "AND length(body)=%d;",
+ batch_id, body_rand_bytes * 2);
+ rc = db_select_int(receiver, sql, &matches); if (rc != SQLITE_OK) goto cleanup;
+ if (matches == row_count) { found = true; break; }
+
+ sqlite3_sleep(500);
+ }
+
+ if (!found) {
+ printf("Error: chunked single-sync drain batch %s was not received.\n", batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (!observed_multi_chunk) {
+ printf("Error: a single cloudsync_network_sync() did not drain a multi-chunk stream (chunks>1, complete=1) for batch %s.\n", batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+cleanup:
+ if (cleanup_remote_rows && sender) {
+ snprintf(sql, sizeof(sql), "DELETE FROM chunked_payload_items WHERE id LIKE '%s-%%';", batch_id);
+ if (db_exec(sender, sql) == SQLITE_OK) {
+ db_exec(sender, "SELECT cloudsync_network_send_changes();");
+ }
+ }
+ test_chunked_pair_close(sender, receiver);
+ return rc;
+}
+
+// Verifies the opt-in max_chunks cap on cloudsync_network_receive_changes(): each call
+// applies at most one chunk, reports complete=false while more is pending, and resumes
+// across calls (the in-memory page cursor persists) until the whole batch is received.
+int test_chunked_payload_capped_receive(void) {
+ int rc = SQLITE_OK;
+ sqlite3 *sender = NULL;
+ sqlite3 *receiver = NULL;
+ char network_init[1024];
+ char batch_id[UUID_STR_MAXLEN];
+ char sql[1024];
+ bool found = false;
+ bool observed_capped_partial = false; // saw chunks==1 && complete=0 from a receive(1) call
+ bool cleanup_remote_rows = false;
+ // INCOMPRESSIBLE random bodies (distinct per row) so the payload can't compress
+ // below the server's page size. Multi-page splitting is driven by the TENANT's
+ // payload_max_chunk_size (the server-side check-response page size, distinct from
+ // the client upload chunk size set below), which must be small on
+ // INTEGRATION_TEST_CHUNKED_DATABASE_ID — set it to 262144 to match the client — so
+ // the batch spans several pages and receive_changes(1) leaves a partial. (zeroblob
+ // bodies would compress away and collapse to one page.)
+ const int row_count = 500;
+ const int body_rand_bytes = 2048; // 4096-char bodies; ~1 MB random / ~2 MB serialized
+
+ rc = test_chunked_pair_open(&sender, &receiver, network_init, sizeof(network_init));
+ if (rc == TEST_SKIPPED) return TEST_SKIPPED;
+ if (rc != SQLITE_OK) goto cleanup;
+
+ cloudsync_uuid_v7_string(batch_id, true);
+ rc = db_exec(sender, "SELECT cloudsync_set('payload_max_chunk_size', '262144');"); if (rc != SQLITE_OK) goto cleanup;
+ snprintf(sql, sizeof(sql),
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < %d) "
+ "INSERT INTO chunked_payload_items (id, body) "
+ "SELECT '%s-' || printf('%%03d', i), lower(hex(randomblob(%d))) FROM c;",
+ row_count, batch_id, body_rand_bytes);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_expect_min(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks();", 2); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_send_ok(sender); if (rc != SQLITE_OK) goto cleanup;
+ cleanup_remote_rows = true;
+
+ for (int attempt = 0; attempt < 80; ++attempt) {
+ int chunks = 0, complete = 0, matches = 0;
+ char recv_err[512];
+
+ // Cap each call to a single chunk; read both fields from that one call. Must
+ // stay a read-only SELECT (subquery): CREATE TABLE ... AS SELECT would block
+ // the apply savepoint ("SQL statements in progress") and silently drop the
+ // downloaded chunk.
+ rc = db_select_receive(receiver,
+ "SELECT j ->> '$.receive.chunks', j ->> '$.receive.complete', j ->> '$.receive.error' "
+ "FROM (SELECT cloudsync_network_receive_changes(1) AS j);",
+ &chunks, &complete, recv_err, sizeof(recv_err));
+ if (rc != SQLITE_OK) goto cleanup;
+ // Surface a swallowed client-side apply error instead of looping to timeout.
+ if (recv_err[0]) {
+ printf("Error: capped-receive batch %s reported receive.error: %s\n", batch_id, recv_err);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ // The cap must never apply more than one chunk per call.
+ if (chunks > 1) {
+ printf("Error: cloudsync_network_receive_changes(1) applied %d chunks (cap violated) for batch %s.\n", chunks, batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (chunks == 1 && complete == 0) observed_capped_partial = true;
+
+ // Random bodies can't be matched by value; assert the full set arrived with
+ // each row at its expected length (content correctness is covered elsewhere).
+ snprintf(sql, sizeof(sql),
+ "SELECT COUNT(*) FROM chunked_payload_items "
+ "WHERE id LIKE '%s-%%' "
+ "AND length(body)=%d;",
+ batch_id, body_rand_bytes * 2);
+ rc = db_select_int(receiver, sql, &matches); if (rc != SQLITE_OK) goto cleanup;
+ if (matches == row_count) { found = true; break; }
+
+ sqlite3_sleep(300);
+ }
+
+ if (!found) {
+ printf("Error: capped-receive batch %s was not fully received.\n", batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (!observed_capped_partial) {
+ printf("Error: cloudsync_network_receive_changes(1) never reported a capped partial drain (chunks=1, complete=0) for batch %s.\n", batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+cleanup:
+ if (cleanup_remote_rows && sender) {
+ snprintf(sql, sizeof(sql), "DELETE FROM chunked_payload_items WHERE id LIKE '%s-%%';", batch_id);
+ if (db_exec(sender, sql) == SQLITE_OK) {
+ db_exec(sender, "SELECT cloudsync_network_send_changes();");
+ }
+ }
+ test_chunked_pair_close(sender, receiver);
+ return rc;
+}
+
+// Verifies the batched cursor-spool response shape with an explicit maxChunks cap
+// greater than one. The old single-page client parser ignores data.chunks[] and
+// would report chunks=0 forever against the batched server response.
+int test_chunked_payload_batched_receive(void) {
+ int rc = SQLITE_OK;
+ sqlite3 *sender = NULL;
+ sqlite3 *receiver = NULL;
+ char network_init[1024];
+ char batch_id[UUID_STR_MAXLEN];
+ char sql[1024];
+ bool found = false;
+ bool observed_batched_partial = false; // saw chunks==2 && complete=0 from receive(2)
+ bool cleanup_remote_rows = false;
+ const int row_count = 500;
+ const int body_rand_bytes = 2048;
+
+ rc = test_chunked_pair_open(&sender, &receiver, network_init, sizeof(network_init));
+ if (rc == TEST_SKIPPED) return TEST_SKIPPED;
+ if (rc != SQLITE_OK) goto cleanup;
+
+ cloudsync_uuid_v7_string(batch_id, true);
+ rc = db_exec(sender, "SELECT cloudsync_set('payload_max_chunk_size', '262144');"); if (rc != SQLITE_OK) goto cleanup;
+ snprintf(sql, sizeof(sql),
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < %d) "
+ "INSERT INTO chunked_payload_items (id, body) "
+ "SELECT '%s-' || printf('%%03d', i), lower(hex(randomblob(%d))) FROM c;",
+ row_count, batch_id, body_rand_bytes);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_expect_min(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks();", 3); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_send_ok(sender); if (rc != SQLITE_OK) goto cleanup;
+ cleanup_remote_rows = true;
+
+ for (int attempt = 0; attempt < 80; ++attempt) {
+ int chunks = 0, complete = 0, matches = 0;
+ char recv_err[512];
+
+ rc = db_select_receive(receiver,
+ "SELECT j ->> '$.receive.chunks', j ->> '$.receive.complete', j ->> '$.receive.error' "
+ "FROM (SELECT cloudsync_network_receive_changes(2) AS j);",
+ &chunks, &complete, recv_err, sizeof(recv_err));
+ if (rc != SQLITE_OK) goto cleanup;
+ if (recv_err[0]) {
+ printf("Error: batched-receive batch %s reported receive.error: %s\n", batch_id, recv_err);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (chunks > 2) {
+ printf("Error: cloudsync_network_receive_changes(2) applied %d chunks (cap violated) for batch %s.\n", chunks, batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (chunks == 2 && complete == 0) observed_batched_partial = true;
+
+ snprintf(sql, sizeof(sql),
+ "SELECT COUNT(*) FROM chunked_payload_items "
+ "WHERE id LIKE '%s-%%' "
+ "AND length(body)=%d;",
+ batch_id, body_rand_bytes * 2);
+ rc = db_select_int(receiver, sql, &matches); if (rc != SQLITE_OK) goto cleanup;
+ if (matches == row_count) { found = true; break; }
+
+ sqlite3_sleep(300);
+ }
+
+ if (!found) {
+ printf("Error: batched-receive batch %s was not fully received.\n", batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (!observed_batched_partial) {
+ printf("Error: cloudsync_network_receive_changes(2) never reported a batched partial drain (chunks=2, complete=0) for batch %s.\n", batch_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+cleanup:
+ if (cleanup_remote_rows && sender) {
+ snprintf(sql, sizeof(sql), "DELETE FROM chunked_payload_items WHERE id LIKE '%s-%%';", batch_id);
+ if (db_exec(sender, sql) == SQLITE_OK) {
+ db_exec(sender, "SELECT cloudsync_network_send_changes();");
+ }
+ }
+ test_chunked_pair_close(sender, receiver);
+ return rc;
+}
+
+int test_chunked_send_failure_preserves_checkpoint(void) {
+ int rc = SQLITE_OK;
+ sqlite3 *sender = NULL;
+ char network_init[1024];
+ char batch_id[UUID_STR_MAXLEN];
+ char sql[1024];
+ char *errmsg = NULL;
+ int send_dbversion_before = 0;
+ int send_dbversion_after = -1;
+ const int row_count = 500;
+ const int body_bytes = 1600;
+
+ rc = test_chunked_sender_open(&sender, network_init, sizeof(network_init));
+ if (rc == TEST_SKIPPED) return TEST_SKIPPED;
+ if (rc != SQLITE_OK) goto cleanup;
+
+ rc = test_chunked_failure_schema_init(sender); if (rc != SQLITE_OK) goto cleanup;
+ rc = db_exec(sender, "SELECT cloudsync_set('payload_max_chunk_size', '262144');"); if (rc != SQLITE_OK) goto cleanup;
+
+ cloudsync_uuid_v7_string(batch_id, true);
+ snprintf(sql, sizeof(sql),
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < %d) "
+ "INSERT INTO chunked_payload_failure_items (id, body) "
+ "SELECT '%s-' || printf('%%03d', i), lower(hex(zeroblob(%d))) FROM c;",
+ row_count, batch_id, body_bytes);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_expect_min(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks();", 2); if (rc != SQLITE_OK) goto cleanup;
+ rc = db_expect_int(sender, "SELECT COUNT(*) FROM cloudsync_payload_chunks() WHERE hex(substr(payload,5,1))='03';", 0); if (rc != SQLITE_OK) goto cleanup;
+
+ rc = db_select_int(sender,
+ "SELECT CAST(COALESCE((SELECT value FROM cloudsync_settings WHERE key='send_dbversion'), '0') AS INTEGER);",
+ &send_dbversion_before);
+ if (rc != SQLITE_OK) goto cleanup;
+
+ rc = sqlite3_exec(sender, "SELECT cloudsync_network_send_changes();", NULL, NULL, &errmsg);
+ if (rc == SQLITE_OK) {
+ printf("Error: chunked send failure test expected cloudsync_network_send_changes to fail.\n");
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (errmsg) {
+ sqlite3_free(errmsg);
+ errmsg = NULL;
+ }
+
+ rc = db_select_int(sender,
+ "SELECT CAST(COALESCE((SELECT value FROM cloudsync_settings WHERE key='send_dbversion'), '0') AS INTEGER);",
+ &send_dbversion_after);
+ if (rc != SQLITE_OK) goto cleanup;
+
+ if (send_dbversion_after != send_dbversion_before) {
+ printf("Error: send_dbversion advanced after failed chunked send (before=%d after=%d).\n",
+ send_dbversion_before, send_dbversion_after);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+ rc = SQLITE_OK;
+
+cleanup:
+ if (errmsg) sqlite3_free(errmsg);
+ test_chunked_sender_close(sender);
+ return rc;
+}
+
+// Regression test for the chunked-check stale negative-cache bug
+// (see cloudsync/docs/chunked-check-negative-cache.md). A receiver that drains
+// the tenant to empty must not be permanently pinned to "up to date": an empty
+// /check result advances no cursor, so the receiver keeps polling the same
+// (dbVersion, seq) key. A server that *caches* that empty chunk at the key would
+// keep answering "no changes" even after another client commits changes at the
+// same key, hiding them until the artifact TTL (up to 24h) expires.
+//
+// This drives the exact sequence that exposes the bug end-to-end:
+// 1. the receiver drains the whole tenant until it is provably caught up;
+// 2. it keeps polling for a few more seconds and asserts it stays at 0 rows
+// (steady empty state — this is where the stale empty chunk would be cached);
+// 3. a second client (the sender) inserts a row and sends it at that same key;
+// 4. the receiver must observe that row on a subsequent receive. Against a
+// server that caches the negative result this never arrives and the test
+// fails; against the fixed server it is delivered.
+//
+// "Caught up" cannot be detected from row counts alone: a server that is still
+// preparing a page replies 202, which the client surfaces as receive.rows=0,
+// complete=1 — indistinguishable from a genuinely empty result. (An earlier
+// version of this test mistook three preparation 202s for "drained" and then
+// had ~71k backlog rows land during phase 2.) So we anchor on a sentinel: the
+// sender commits a marker row *after* any pre-existing backlog, and phase 1 is
+// only considered drained once the receiver has both applied that sentinel
+// (the whole backlog is therefore behind it) and then seen a 0-row poll.
+int test_chunked_negative_cache_invalidation(void) {
+ int rc = SQLITE_OK;
+ sqlite3 *sender = NULL;
+ sqlite3 *receiver = NULL;
+ char network_init[1024];
+ char sentinel_id[UUID_STR_MAXLEN];
+ char row_id[UUID_STR_MAXLEN];
+ char sql[1024];
+ bool drained = false;
+ bool found = false;
+ bool cleanup_sentinel = false;
+ bool cleanup_remote_row = false;
+
+ rc = test_chunked_pair_open(&sender, &receiver, network_init, sizeof(network_init));
+ if (rc == TEST_SKIPPED) return TEST_SKIPPED;
+ if (rc != SQLITE_OK) goto cleanup;
+
+ // Commit a sentinel row that sorts after any pre-existing tenant backlog, so
+ // observing it on the receiver proves the entire backlog has been drained.
+ cloudsync_uuid_v7_string(sentinel_id, true);
+ snprintf(sql, sizeof(sql),
+ "INSERT INTO chunked_payload_items (id, body) VALUES ('%s', 'negative-cache-sentinel');",
+ sentinel_id);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+ rc = db_send_ok(sender); if (rc != SQLITE_OK) goto cleanup;
+ cleanup_sentinel = true;
+
+ // Phase 1: drain the receiver until it is provably caught up. Each bare
+ // receive_changes() applies everything currently ready and returns at the first
+ // 202, so a large backlog is pulled across several iterations. Termination
+ // requires the sentinel to be present (backlog fully drained) AND a subsequent
+ // 0-row poll, so a mid-preparation 202 can never be mistaken for "caught up".
+ bool sentinel_seen = false;
+ for (int attempt = 0; attempt < 200 && !drained; ++attempt) {
+ int rows = 0, complete = 0;
+ char recv_err[512];
+
+ rc = db_select_receive(receiver,
+ "SELECT j ->> '$.receive.rows', j ->> '$.receive.complete', j ->> '$.receive.error' "
+ "FROM (SELECT cloudsync_network_receive_changes() AS j);",
+ &rows, &complete, recv_err, sizeof(recv_err));
+ if (rc != SQLITE_OK) goto cleanup;
+ if (recv_err[0]) {
+ printf("Error: negative-cache drain reported receive.error: %s\n", recv_err);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+ if (!sentinel_seen) {
+ int matches = 0;
+ snprintf(sql, sizeof(sql),
+ "SELECT COUNT(*) FROM chunked_payload_items WHERE id='%s';", sentinel_id);
+ rc = db_select_int(receiver, sql, &matches); if (rc != SQLITE_OK) goto cleanup;
+ if (matches == 1) sentinel_seen = true;
+ }
+
+ // Caught up only once the backlog (including the sentinel) is fully applied
+ // and a further poll delivers nothing.
+ if (sentinel_seen && rows == 0) drained = true;
+
+ sqlite3_sleep(300);
+ }
+
+ if (!drained) {
+ printf("Error: negative-cache receiver never caught up to the sentinel row.\n");
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+ // Phase 2: keep polling for a few seconds with the tenant idle. The receiver is
+ // caught up, so every poll must continue to report 0 rows. (This is the window
+ // where a buggy server caches the empty chunk at the receiver's key.)
+ for (int i = 0; i < 6; ++i) {
+ int rows = 0, complete = 0;
+ char recv_err[512];
+
+ rc = db_select_receive(receiver,
+ "SELECT j ->> '$.receive.rows', j ->> '$.receive.complete', j ->> '$.receive.error' "
+ "FROM (SELECT cloudsync_network_receive_changes() AS j);",
+ &rows, &complete, recv_err, sizeof(recv_err));
+ if (rc != SQLITE_OK) goto cleanup;
+ if (recv_err[0]) {
+ printf("Error: negative-cache idle poll reported receive.error: %s\n", recv_err);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+ if (rows != 0) {
+ printf("Error: negative-cache receiver applied %d unexpected rows while the tenant was idle.\n", rows);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+ sqlite3_sleep(500);
+ }
+
+ // Phase 3: a second client commits a change at the same (dbVersion, seq) key
+ // the receiver has been polling against.
+ cloudsync_uuid_v7_string(row_id, true);
+ snprintf(sql, sizeof(sql),
+ "INSERT INTO chunked_payload_items (id, body) VALUES ('%s', 'negative-cache');",
+ row_id);
+ rc = db_exec(sender, sql); if (rc != SQLITE_OK) goto cleanup;
+ rc = db_send_ok(sender); if (rc != SQLITE_OK) goto cleanup;
+ cleanup_remote_row = true;
+
+ // Phase 4: the receiver must now pick up the change on a subsequent receive.
+ // A stale negative cache would keep answering "no changes" at the unchanged key
+ // and this row would never arrive.
+ for (int attempt = 0; attempt < 60; ++attempt) {
+ int matches = 0;
+ char recv_err[512];
+ int rows = 0, complete = 0;
+
+ rc = db_select_receive(receiver,
+ "SELECT j ->> '$.receive.rows', j ->> '$.receive.complete', j ->> '$.receive.error' "
+ "FROM (SELECT cloudsync_network_receive_changes() AS j);",
+ &rows, &complete, recv_err, sizeof(recv_err));
+ if (rc != SQLITE_OK) goto cleanup;
+ if (recv_err[0]) {
+ printf("Error: negative-cache post-send poll reported receive.error: %s\n", recv_err);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+ snprintf(sql, sizeof(sql),
+ "SELECT COUNT(*) FROM chunked_payload_items WHERE id='%s' AND body='negative-cache';",
+ row_id);
+ rc = db_select_int(receiver, sql, &matches); if (rc != SQLITE_OK) goto cleanup;
+ if (matches == 1) { found = true; break; }
+
+ sqlite3_sleep(500);
+ }
+
+ if (!found) {
+ printf("Error: stale negative cache — receiver never received row %s after the sender committed it.\n", row_id);
+ rc = SQLITE_ERROR;
+ goto cleanup;
+ }
+
+cleanup:
+ if ((cleanup_sentinel || cleanup_remote_row) && sender) {
+ if (cleanup_remote_row) {
+ snprintf(sql, sizeof(sql), "DELETE FROM chunked_payload_items WHERE id='%s';", row_id);
+ db_exec(sender, sql);
+ }
+ if (cleanup_sentinel) {
+ snprintf(sql, sizeof(sql), "DELETE FROM chunked_payload_items WHERE id='%s';", sentinel_id);
+ db_exec(sender, sql);
+ }
+ db_exec(sender, "SELECT cloudsync_network_send_changes();");
+ }
+ test_chunked_pair_close(sender, receiver);
+ return rc;
+}
+
int test_offline_error(const char *db_path) {
sqlite3 *db = NULL;
int rc = open_load_ext(db_path, &db);
@@ -390,10 +1362,11 @@ int test_offline_error(const char *db_path) {
// Initialize network with offline database ID
const char* offline_db_id = getenv("INTEGRATION_TEST_OFFLINE_DATABASE_ID");
- if (!offline_db_id) {
- printf("Skipping offline error test: INTEGRATION_TEST_OFFLINE_DATABASE_ID not set.\n");
- rc = SQLITE_OK;
- goto abort_test;
+ if (!offline_db_id || !*offline_db_id) {
+ // Clean up the db opened above and return TEST_SKIPPED directly (going through
+ // abort_test would trip ERROR_MSG on the non-zero rc and print a spurious error).
+ if (db) { db_exec(db, "SELECT cloudsync_terminate();"); sqlite3_close(db); }
+ return TEST_SKIPPED;
}
char network_init[512];
@@ -494,7 +1467,7 @@ ABORT_TEST
// configured server-side to fail apply and check jobs. Verifies that the
// new failures.{apply,check} response shape is correctly parsed and emitted as
// send.lastFailure (cloudsync_network_send_changes) and receive.lastFailure
-// (cloudsync_network_check_changes), and that cloudsync_network_sync surfaces
+// (cloudsync_network_receive_changes), and that cloudsync_network_sync surfaces
// at least one of them.
//
// First invocation primes the server (sends data, queues a check) — server-side
@@ -505,14 +1478,12 @@ int test_failure_path (const char *db_path) {
sqlite3 *db = NULL;
const char *test_db_id = getenv("INTEGRATION_TEST_FAILURE_DATABASE_ID");
- if (!test_db_id) {
- printf("(INTEGRATION_TEST_FAILURE_DATABASE_ID not set, skipping) ");
- return SQLITE_OK;
+ if (!test_db_id || !*test_db_id) {
+ return TEST_SKIPPED;
}
const char *custom_address = getenv("INTEGRATION_TEST_CLOUDSYNC_ADDRESS");
- if (!custom_address) {
- printf("(INTEGRATION_TEST_CLOUDSYNC_ADDRESS not set, skipping) ");
- return SQLITE_OK;
+ if (!custom_address || !*custom_address) {
+ return TEST_SKIPPED;
}
rc = open_load_ext(db_path, &db); RCHECK
@@ -544,7 +1515,7 @@ int test_failure_path (const char *db_path) {
// First invocation — primes the server. Failures may not yet be reported.
rc = db_exec(db, "SELECT cloudsync_network_send_changes();"); RCHECK
- rc = db_exec(db, "SELECT cloudsync_network_check_changes();"); RCHECK
+ rc = db_exec(db, "SELECT cloudsync_network_receive_changes();"); RCHECK
rc = db_exec(db, "SELECT cloudsync_network_sync(250, 1);"); RCHECK
// Give the server time to process and fail the queued apply/check jobs.
@@ -556,7 +1527,7 @@ int test_failure_path (const char *db_path) {
rc = db_expect_gt0(db,
"SELECT cloudsync_network_send_changes() ->> '$.send.lastFailure.jobId';"); RCHECK
rc = db_expect_gt0(db,
- "SELECT cloudsync_network_check_changes() ->> '$.receive.lastFailure.jobId';"); RCHECK
+ "SELECT cloudsync_network_receive_changes() ->> '$.receive.lastFailure.jobId';"); RCHECK
// sync must surface at least one of the two; instr() catches either path.
rc = db_expect_gt0(db,
"SELECT instr(cloudsync_network_sync(250, 1), '\"lastFailure\":');"); RCHECK
@@ -579,8 +1550,9 @@ ABORT_TEST
// MARK: -
int test_report(const char *description, int rc){
- printf("%-24s %s\n", description, rc ? "FAILED" : "OK");
- return rc;
+ const char *result = (rc == TEST_SKIPPED) ? "SKIPPED" : (rc ? "FAILED" : "OK");
+ printf("%-32s %s\n", description, result);
+ return (rc == TEST_SKIPPED) ? 0 : rc; // a skipped test is not a failure
}
#ifdef PEERS
@@ -633,6 +1605,22 @@ int main (void) {
rc += test_report("Is Enabled Test:", test_is_enabled(DB_PATH));
rc += test_report("DB Version Test:", test_db_version(DB_PATH));
rc += test_report("Enable Disable Test:", test_enable_disable(DB_PATH));
+ rc += test_report("Send Gap From Clock Hole Test:", test_send_gap_from_clock_hole(":memory:"));
+
+ // Chunked payload tests run only when INTEGRATION_TEST_CHUNKED_DATABASE_ID points at a
+ // tenant with a small payload_max_chunk_size; state the skip reason once for the group.
+ const char *chunked_id = getenv("INTEGRATION_TEST_CHUNKED_DATABASE_ID");
+ printf("\n-- Chunked Payload Tests%s --\n",
+ (chunked_id && *chunked_id) ? "" : " (INTEGRATION_TEST_CHUNKED_DATABASE_ID not set, skipping)");
+ rc += test_report("Chunked Paths Test:", test_chunked_payload_paths());
+ rc += test_report("Chunked Rowset Test:", test_chunked_payload_rowset_path());
+ rc += test_report("Chunked Single-Sync Drain Test:", test_chunked_payload_single_sync_drain());
+ rc += test_report("Chunked Capped Receive Test:", test_chunked_payload_capped_receive());
+ rc += test_report("Chunked Batched Receive Test:", test_chunked_payload_batched_receive());
+ rc += test_report("Chunked Failure Test:", test_chunked_send_failure_preserves_checkpoint());
+ rc += test_report("Chunked Negative Cache Test:", test_chunked_negative_cache_invalidation());
+ printf("\n");
+
rc += test_report("Offline Error Test:", test_offline_error(":memory:"));
rc += test_report("Double Empty Init Test:", test_double_empty_network_init(":memory:"));
rc += test_report("Failure Path Test:", test_failure_path(":memory:"));
@@ -731,4 +1719,4 @@ int main (void) {
printf("\n");
return rc;
-}
\ No newline at end of file
+}
diff --git a/test/network_unit.c b/test/network_unit.c
new file mode 100644
index 00000000..f4ccb27c
--- /dev/null
+++ b/test/network_unit.c
@@ -0,0 +1,139 @@
+//
+// network_unit.c
+// cloudsync
+//
+// Unit tests for the network layer's pure response-handling logic. Built with
+// networking ENABLED (unlike dist/unit, which is -DCLOUDSYNC_OMIT_NETWORK), so it
+// can call the internal functions directly on crafted in-memory NETWORK_RESULT
+// buffers — no server, no sockets.
+//
+
+#include
+#include
+#include
+#include
+#include
+#include "utils.h"
+#include "network_private.h"
+
+static int failures = 0;
+
+static void check(const char *name, bool ok) {
+ printf("%-64s %s\n", name, ok ? "OK" : "FAIL");
+ if (!ok) failures++;
+}
+
+static NETWORK_RESULT json_buffer(char *json) {
+ NETWORK_RESULT r = {0};
+ r.code = CLOUDSYNC_NETWORK_BUFFER;
+ r.buffer = json;
+ r.blen = strlen(json);
+ return r;
+}
+
+// Regression: lastOptimisticVersion must track the LATEST valid value, including a
+// decrease. The server can roll the optimistic version back when a later send chunk
+// fails; since it becomes the durable send checkpoint, a monotonic "max" would mask
+// the rollback and skip the rolled-back changes on the next send.
+static bool test_optimistic_version_rollback(void) {
+ int64_t optimistic = -1, confirmed = -1;
+ int gaps = -1;
+ char *apply = NULL, *check_fail = NULL;
+ bool ok = true;
+
+ char j1[] = "{\"lastOptimisticVersion\":50,\"lastConfirmedVersion\":10}";
+ NETWORK_RESULT r1 = json_buffer(j1);
+ network_sync_state_update_from_response(&r1, &optimistic, &confirmed, &gaps, &apply, &check_fail);
+ ok = ok && optimistic == 50 && confirmed == 10;
+
+ char j2[] = "{\"lastOptimisticVersion\":100,\"lastConfirmedVersion\":20}";
+ NETWORK_RESULT r2 = json_buffer(j2);
+ network_sync_state_update_from_response(&r2, &optimistic, &confirmed, &gaps, &apply, &check_fail);
+ ok = ok && optimistic == 100 && confirmed == 20;
+
+ // Server rolls back on a later chunk error: the value must DECREASE to 50.
+ char j3[] = "{\"lastOptimisticVersion\":50,\"lastConfirmedVersion\":20}";
+ NETWORK_RESULT r3 = json_buffer(j3);
+ network_sync_state_update_from_response(&r3, &optimistic, &confirmed, &gaps, &apply, &check_fail);
+ ok = ok && optimistic == 50;
+
+ // A response missing the field (parsed -1) must NOT clobber the current value.
+ char j4[] = "{\"lastConfirmedVersion\":20}";
+ NETWORK_RESULT r4 = json_buffer(j4);
+ network_sync_state_update_from_response(&r4, &optimistic, &confirmed, &gaps, &apply, &check_fail);
+ ok = ok && optimistic == 50;
+
+ ok = ok && apply == NULL && check_fail == NULL; // no failures object in these responses
+ return ok;
+}
+
+// A non-BUFFER result (or NULL buffer) must leave the accumulators untouched.
+static bool test_non_buffer_is_noop(void) {
+ int64_t optimistic = 7, confirmed = 3;
+ int gaps = 0;
+ char *apply = NULL, *check_fail = NULL;
+
+ NETWORK_RESULT err = {0};
+ err.code = CLOUDSYNC_NETWORK_ERROR;
+ network_sync_state_update_from_response(&err, &optimistic, &confirmed, &gaps, &apply, &check_fail);
+ return optimistic == 7 && confirmed == 3 && gaps == 0;
+}
+
+// One send call = one all-or-nothing batch: every chunk of it announces the same
+// global window [checkpoint+1 .. watermark] plus batchId/chunkIndex/isFinal, so the
+// server confirms the whole window only when every chunk of the batch applied and a
+// failed batch is re-sent whole under a new id.
+static bool test_apply_json_payload_batch(void) {
+ bool ok = true;
+
+ char *j0 = network_apply_json_payload("blob", "QUJD", 1, 10, "batch-uuid-1", 0, false);
+ ok = ok && j0 != NULL;
+ if (j0) {
+ ok = ok && strstr(j0, "\"blob\":\"QUJD\"") != NULL;
+ ok = ok && strstr(j0, "\"dbVersionMin\":1") != NULL;
+ ok = ok && strstr(j0, "\"dbVersionMax\":10") != NULL;
+ ok = ok && strstr(j0, "\"batchId\":\"batch-uuid-1\"") != NULL;
+ ok = ok && strstr(j0, "\"chunkIndex\":0") != NULL;
+ ok = ok && strstr(j0, "\"isFinal\":false") != NULL;
+ cloudsync_memory_free(j0);
+ }
+
+ // last chunk, url transport: same window, higher index, isFinal true
+ char *j1 = network_apply_json_payload("url", "https://s3/part", 1, 10, "batch-uuid-1", 3, true);
+ ok = ok && j1 != NULL;
+ if (j1) {
+ ok = ok && strstr(j1, "\"url\":\"https://s3/part\"") != NULL;
+ ok = ok && strstr(j1, "\"dbVersionMin\":1") != NULL;
+ ok = ok && strstr(j1, "\"dbVersionMax\":10") != NULL;
+ ok = ok && strstr(j1, "\"batchId\":\"batch-uuid-1\"") != NULL;
+ ok = ok && strstr(j1, "\"chunkIndex\":3") != NULL;
+ ok = ok && strstr(j1, "\"isFinal\":true") != NULL;
+ cloudsync_memory_free(j1);
+ }
+
+ // a batch id is mandatory for the chunked send path
+ ok = ok && network_apply_json_payload("blob", "QUJD", 1, 10, NULL, 0, false) == NULL;
+
+ return ok;
+}
+
+static bool test_compute_status(void) {
+ bool ok = true;
+ ok = ok && strcmp(network_compute_status(100, 100, 0, 100), "synced") == 0;
+ ok = ok && strcmp(network_compute_status(100, 50, 0, 100), "syncing") == 0;
+ ok = ok && strcmp(network_compute_status(100, 100, 1, 100), "out-of-sync") == 0; // gaps
+ ok = ok && strcmp(network_compute_status(90, 90, 0, 100), "out-of-sync") == 0; // behind local
+ ok = ok && strcmp(network_compute_status(-1, 100, 0, 100), "error") == 0; // unparsed
+ return ok;
+}
+
+int main(void) {
+ printf("\nNetwork unit tests\n");
+ check("optimistic/confirmed version folds latest-valid (allows rollback):", test_optimistic_version_rollback());
+ check("non-buffer response is a no-op:", test_non_buffer_is_noop());
+ check("send batch /apply payload (window / batchId / chunkIndex / isFinal):", test_apply_json_payload_batch());
+ check("network_compute_status:", test_compute_status());
+ if (failures) { printf("\n%d test(s) FAILED\n", failures); return 1; }
+ printf("\nAll network unit tests passed\n");
+ return 0;
+}
diff --git a/test/postgresql/52_payload_chunks.sql b/test/postgresql/52_payload_chunks.sql
new file mode 100644
index 00000000..ecdd3648
--- /dev/null
+++ b/test/postgresql/52_payload_chunks.sql
@@ -0,0 +1,332 @@
+-- Payload chunks and transparent large-value fragmentation
+
+\set testid '52-chunks'
+\ir helper_test_init.sql
+
+\connect postgres
+\ir helper_psql_conn_setup.sql
+DROP DATABASE IF EXISTS cloudsync_test_52_chunks_src;
+DROP DATABASE IF EXISTS cloudsync_test_52_chunks_dst;
+DROP DATABASE IF EXISTS cloudsync_test_52_chunks_legacy;
+CREATE DATABASE cloudsync_test_52_chunks_src;
+CREATE DATABASE cloudsync_test_52_chunks_dst;
+CREATE DATABASE cloudsync_test_52_chunks_legacy;
+
+\connect cloudsync_test_52_chunks_src
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+CREATE TABLE payload_chunk_test (
+ id TEXT PRIMARY KEY,
+ note TEXT DEFAULT '',
+ data BYTEA DEFAULT '\x'::bytea
+);
+SELECT cloudsync_init('payload_chunk_test', 'CLS', 1) AS _init_src \gset
+SELECT cloudsync_set('payload_max_chunk_size', '1');
+
+INSERT INTO payload_chunk_test(id, note, data)
+SELECT
+ 'big',
+ (SELECT string_agg(md5(i::text), '') FROM generate_series(1, 22500) AS g(i)),
+ decode((SELECT string_agg(md5((i * 17)::text), '') FROM generate_series(1, 23000) AS g(i)), 'hex');
+
+INSERT INTO payload_chunk_test(id, note, data)
+VALUES
+ ('same-a', 'same payload a', decode(repeat('ab', 360000), 'hex')),
+ ('same-b', 'same payload b', decode(repeat('ab', 360000), 'hex'));
+
+INSERT INTO payload_chunk_test(id, note, data)
+SELECT
+ format('row-%s', lpad(i::text, 3, '0')),
+ format('small-%s-%s', i, repeat(md5(i::text), 24)),
+ decode(repeat(md5((i * 31)::text), 16), 'hex')
+FROM generate_series(1, 260) AS g(i);
+
+SELECT
+ count(*) AS chunk_count,
+ count(*) FILTER (WHERE get_byte(payload, 4) = 3) AS v3_chunk_count,
+ bool_and(octet_length(payload) <= 262144) AS chunks_within_limit,
+ max(octet_length(payload)) AS max_chunk_len,
+ sum(rows) AS chunk_rows
+FROM cloudsync_payload_chunks() \gset
+
+\if :chunks_within_limit
+\echo [PASS] (:testid) Generated chunks respect the 256KB technical minimum - max: :max_chunk_len
+\else
+\echo [FAIL] (:testid) Generated chunk exceeds 256KB - max: :max_chunk_len
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+SELECT (:chunk_count::int >= 5 AND :v3_chunk_count::int >= 2) AS chunk_shape_ok \gset
+\if :chunk_shape_ok
+\echo [PASS] (:testid) Rowset and large-value fragmentation produced multiple chunks (:chunk_count total, :v3_chunk_count v3)
+\else
+\echo [FAIL] (:testid) Expected multiple chunks and v3 fragments, got :chunk_count total and :v3_chunk_count v3
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+SELECT count(*) AS explicit_arg_chunk_count
+FROM cloudsync_payload_chunks(NULL, cloudsync_siteid(), NULL) \gset
+
+SELECT (:explicit_arg_chunk_count::int = :chunk_count::int) AS explicit_args_ok \gset
+\if :explicit_args_ok
+\echo [PASS] (:testid) Optional cloudsync_payload_chunks arguments work
+\else
+\echo [FAIL] (:testid) Optional cloudsync_payload_chunks arguments changed result count
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- exclude_filter_site_id flag: every change here originates from the local
+-- site, so excluding it must yield zero chunks while including it yields the
+-- full set. This proves the predicate flips between "= site" and "<> site".
+SELECT count(*) AS excl_local_chunks
+FROM cloudsync_payload_chunks(0, cloudsync_siteid(), NULL, true) \gset
+
+SELECT count(*) AS incl_local_chunks
+FROM cloudsync_payload_chunks(0, cloudsync_siteid(), NULL, false) \gset
+
+SELECT
+ cloudsync_payload_blob_checked(0, 0, cloudsync_siteid(), true, 10000000) IS NULL AS excl_local_blob_is_null,
+ octet_length(cloudsync_payload_blob_checked(0, 0, cloudsync_siteid(), false, 10000000)) AS incl_local_blob_size \gset
+
+SELECT (:excl_local_chunks::int = 0 AND :incl_local_chunks::int > 0) AS exclude_flag_ok \gset
+\if :exclude_flag_ok
+\echo [PASS] (:testid) exclude_filter_site_id flips the site filter (exclude local -> 0, include -> :incl_local_chunks)
+\else
+\echo [FAIL] (:testid) exclude_filter_site_id did not flip the filter (exclude=:excl_local_chunks include=:incl_local_chunks)
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+SELECT (:'excl_local_blob_is_null'::bool AND :incl_local_blob_size::bigint > 0) AS blob_checked_exclude_flag_ok \gset
+\if :blob_checked_exclude_flag_ok
+\echo [PASS] (:testid) cloudsync_payload_blob_checked honors include/exclude site filters
+\else
+\echo [FAIL] (:testid) cloudsync_payload_blob_checked include/exclude mismatch (exclude_null=:excl_local_blob_is_null include=:incl_local_blob_size)
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- exclude=true without a filter_site_id must raise an error
+CREATE TEMP TABLE _excl_err(ok bool);
+DO $$
+BEGIN
+ PERFORM 1 FROM cloudsync_payload_chunks(0, NULL, NULL, true);
+ INSERT INTO _excl_err VALUES (false);
+EXCEPTION WHEN OTHERS THEN
+ INSERT INTO _excl_err VALUES (true);
+END $$;
+SELECT ok AS exclude_no_site_errors FROM _excl_err \gset
+DROP TABLE _excl_err;
+\if :exclude_no_site_errors
+\echo [PASS] (:testid) exclude_filter_site_id without a site_id raises an error
+\else
+\echo [FAIL] (:testid) exclude_filter_site_id without a site_id did not error
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+CREATE TEMP TABLE _blob_checked_excl_err(ok bool);
+DO $$
+BEGIN
+ PERFORM cloudsync_payload_blob_checked(0, 0, NULL, true, 10000000);
+ INSERT INTO _blob_checked_excl_err VALUES (false);
+EXCEPTION WHEN OTHERS THEN
+ INSERT INTO _blob_checked_excl_err VALUES (true);
+END $$;
+SELECT ok AS blob_checked_exclude_no_site_errors FROM _blob_checked_excl_err \gset
+DROP TABLE _blob_checked_excl_err;
+\if :blob_checked_exclude_no_site_errors
+\echo [PASS] (:testid) cloudsync_payload_blob_checked exclude without a site_id raises an error
+\else
+\echo [FAIL] (:testid) cloudsync_payload_blob_checked exclude without a site_id did not error
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- UUID text<->blob roundtrip, including the /check string-argument path
+SELECT (
+ cloudsync_uuid_blob(cloudsync_uuid_text(cloudsync_siteid())) = cloudsync_siteid()
+ AND cloudsync_uuid_blob(cloudsync_uuid_text(cloudsync_siteid(), false)) = cloudsync_siteid()
+ AND cloudsync_uuid_blob(upper(cloudsync_uuid_text(cloudsync_siteid()))) = cloudsync_siteid()
+ AND length(cloudsync_uuid_text(cloudsync_siteid())) = 36
+ AND length(cloudsync_uuid_text(cloudsync_siteid(), false)) = 32
+) AS uuid_conv_ok \gset
+\if :uuid_conv_ok
+\echo [PASS] (:testid) cloudsync_uuid_text/cloudsync_uuid_blob roundtrip (dashed, undashed, uppercase)
+\else
+\echo [FAIL] (:testid) UUID conversion roundtrip mismatch
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- A string-derived site_id (as the /check endpoint would pass it) selects the
+-- same chunks as the binary site_id.
+SELECT count(*) AS str_arg_chunks
+FROM cloudsync_payload_chunks(0, cloudsync_uuid_blob(cloudsync_uuid_text(cloudsync_siteid())), NULL, false) \gset
+SELECT (:str_arg_chunks::int = :incl_local_chunks::int) AS str_arg_ok \gset
+\if :str_arg_ok
+\echo [PASS] (:testid) String-derived site_id via cloudsync_uuid_blob matches the binary site_id
+\else
+\echo [FAIL] (:testid) String-derived site_id mismatch (:str_arg_chunks vs :incl_local_chunks)
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+SELECT
+ md5(string_agg(id || ':' || note || ':' || encode(data, 'hex'), '|' ORDER BY id)) AS src_hash,
+ count(*) AS src_count
+FROM payload_chunk_test \gset
+
+SELECT string_agg(encode(payload, 'hex'), ',' ORDER BY chunk_index) AS chunks_hex
+FROM cloudsync_payload_chunks() \gset
+
+SELECT
+ encode(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq), 'hex') AS legacy_payload_hex,
+ octet_length(cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq)) AS legacy_payload_len
+FROM cloudsync_changes
+WHERE site_id = cloudsync_siteid() \gset
+
+SELECT
+ octet_length(cloudsync_payload_blob_checked(0, 0, cloudsync_siteid(), false, 10000000)) AS checked_payload_len,
+ cloudsync_payload_blob_checked(999999, 0, cloudsync_siteid(), false, 10000000) IS NULL AS checked_empty_is_null \gset
+
+SELECT (:legacy_payload_len::int > 262144) AS legacy_payload_large_ok \gset
+\if :legacy_payload_large_ok
+\echo [PASS] (:testid) Legacy monolithic payload is larger than local chunk setting (:legacy_payload_len bytes)
+\else
+\echo [FAIL] (:testid) Legacy monolithic payload was expected to exceed the chunk setting
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+SELECT (:checked_payload_len::bigint = :legacy_payload_len::bigint AND :'checked_empty_is_null'::bool) AS blob_checked_ok \gset
+\if :blob_checked_ok
+\echo [PASS] (:testid) cloudsync_payload_blob_checked returns the legacy payload and empty windows return NULL
+\else
+\echo [FAIL] (:testid) cloudsync_payload_blob_checked mismatch (checked=:checked_payload_len legacy=:legacy_payload_len empty=:checked_empty_is_null)
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- This step also guards a backend crash regression: the size-limit ereport()
+-- fires right after the internal cloudsync_changes_select() cursor has run and
+-- unwound. If that SRF ever again returns from inside its PG_TRY (leaving
+-- PG_exception_stack dangling), this ereport longjmps into freed stack and
+-- segfaults instead of raising -- which drops the connection and fails the suite.
+CREATE TEMP TABLE _blob_checked_limit_err(ok bool);
+DO $$
+BEGIN
+ PERFORM cloudsync_payload_blob_checked(0, 0, cloudsync_siteid(), false, 1);
+ INSERT INTO _blob_checked_limit_err VALUES (false);
+EXCEPTION WHEN OTHERS THEN
+ INSERT INTO _blob_checked_limit_err VALUES (true);
+END $$;
+SELECT ok AS blob_checked_limit_errors FROM _blob_checked_limit_err \gset
+DROP TABLE _blob_checked_limit_err;
+\if :blob_checked_limit_errors
+\echo [PASS] (:testid) cloudsync_payload_blob_checked raises a limit-exceeded error
+\else
+\echo [FAIL] (:testid) cloudsync_payload_blob_checked did not raise a limit-exceeded error
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\connect cloudsync_test_52_chunks_dst
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+CREATE TABLE payload_chunk_test (
+ id TEXT PRIMARY KEY,
+ note TEXT DEFAULT '',
+ data BYTEA DEFAULT '\x'::bytea
+);
+SELECT cloudsync_init('payload_chunk_test', 'CLS', 1) AS _init_dst \gset
+SELECT cloudsync_set('payload_max_chunk_size', '262144');
+
+CREATE TEMP TABLE chunk_transport(ord INT, payload BYTEA);
+INSERT INTO chunk_transport(ord, payload)
+SELECT ord::int, decode(chunk_hex, 'hex')
+FROM unnest(string_to_array(:'chunks_hex', ',')) WITH ORDINALITY AS t(chunk_hex, ord);
+
+-- Stale-fragment GC: on this fresh connection no fragment has been applied yet,
+-- so the first applied v3 fragment triggers cleanup of the old incomplete group.
+CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments (
+ value_id TEXT NOT NULL, part_index BIGINT NOT NULL, part_count BIGINT NOT NULL,
+ total_size BIGINT NOT NULL, checksum TEXT NOT NULL,
+ created_at BIGINT NOT NULL DEFAULT (EXTRACT(EPOCH FROM now())::bigint),
+ tbl TEXT NOT NULL, pk BYTEA NOT NULL, col_name TEXT NOT NULL, col_version BIGINT NOT NULL,
+ db_version BIGINT NOT NULL, site_id BYTEA NOT NULL, cl BIGINT NOT NULL, seq BIGINT NOT NULL,
+ fragment BYTEA NOT NULL, PRIMARY KEY(value_id, part_index)
+);
+INSERT INTO cloudsync_payload_fragments
+(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment)
+VALUES ('stale-incomplete', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', '\x01', 'data', 1, 1, decode(repeat('00', 16), 'hex'), 1, 1, '\x00');
+
+SELECT cloudsync_payload_apply(payload) AS stale_cleanup_apply
+FROM chunk_transport WHERE get_byte(payload, 4) = 3 ORDER BY ord LIMIT 1 \gset
+
+SELECT (COUNT(*) = 0) AS stale_cleanup_ok
+FROM cloudsync_payload_fragments WHERE value_id = 'stale-incomplete' \gset
+\if :stale_cleanup_ok
+\echo [PASS] (:testid) Stale incomplete fragment cleanup works (first apply on a fresh connection)
+\else
+\echo [FAIL] (:testid) Stale incomplete fragment cleanup failed
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+SELECT coalesce(sum(cloudsync_payload_apply(payload)), 0) AS chunk_apply_rows
+FROM (SELECT payload FROM chunk_transport ORDER BY ord DESC) AS ordered_chunks \gset
+
+SELECT
+ md5(string_agg(id || ':' || note || ':' || encode(data, 'hex'), '|' ORDER BY id)) AS dst_hash,
+ count(*) AS dst_count
+FROM payload_chunk_test \gset
+
+SELECT (:'dst_hash' = :'src_hash' AND :dst_count::int = :src_count::int) AS chunk_apply_ok \gset
+\if :chunk_apply_ok
+\echo [PASS] (:testid) Chunked payloads apply correctly, including reverse-order v3 fragments and identical large values
+\else
+\echo [FAIL] (:testid) Chunked payload apply mismatch
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- Throttle check: the full apply above already ran the stale GC, so an old
+-- group inserted now must NOT be removed by the next immediate fragment apply.
+-- This proves the GC is not re-scanned on every applied fragment (the O(n^2) fix).
+INSERT INTO cloudsync_payload_fragments
+(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment)
+VALUES ('stale-incomplete-2', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', '\x02', 'data', 1, 1, decode(repeat('00', 16), 'hex'), 1, 1, '\x00');
+
+SELECT cloudsync_payload_apply(payload) AS throttle_apply
+FROM chunk_transport WHERE get_byte(payload, 4) = 3 ORDER BY ord LIMIT 1 \gset
+
+SELECT (COUNT(*) = 1) AS stale_throttle_ok
+FROM cloudsync_payload_fragments WHERE value_id = 'stale-incomplete-2' \gset
+\if :stale_throttle_ok
+\echo [PASS] (:testid) Stale GC is throttled (not re-run on every applied fragment)
+\else
+\echo [FAIL] (:testid) Stale GC was not throttled (removed a fresh-inserted old group on the next apply)
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\connect cloudsync_test_52_chunks_legacy
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+CREATE TABLE payload_chunk_test (
+ id TEXT PRIMARY KEY,
+ note TEXT DEFAULT '',
+ data BYTEA DEFAULT '\x'::bytea
+);
+SELECT cloudsync_init('payload_chunk_test', 'CLS', 1) AS _init_legacy \gset
+SELECT cloudsync_set('payload_max_chunk_size', '262144');
+SELECT cloudsync_payload_apply(decode(:'legacy_payload_hex', 'hex')) AS legacy_apply_rows \gset
+
+SELECT
+ md5(string_agg(id || ':' || note || ':' || encode(data, 'hex'), '|' ORDER BY id)) AS legacy_hash,
+ count(*) AS legacy_count
+FROM payload_chunk_test \gset
+
+SELECT (:'legacy_hash' = :'src_hash' AND :legacy_count::int = :src_count::int) AS legacy_apply_ok \gset
+\if :legacy_apply_ok
+\echo [PASS] (:testid) Legacy monolithic payload applies even when larger than local chunk setting
+\else
+\echo [FAIL] (:testid) Legacy monolithic payload apply mismatch
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\ir helper_test_cleanup.sql
+\if :should_cleanup
+DROP DATABASE IF EXISTS cloudsync_test_52_chunks_src;
+DROP DATABASE IF EXISTS cloudsync_test_52_chunks_dst;
+DROP DATABASE IF EXISTS cloudsync_test_52_chunks_legacy;
+\endif
diff --git a/test/postgresql/53_payload_blob_checked_pg_try.sql b/test/postgresql/53_payload_blob_checked_pg_try.sql
new file mode 100644
index 00000000..d2c64840
--- /dev/null
+++ b/test/postgresql/53_payload_blob_checked_pg_try.sql
@@ -0,0 +1,77 @@
+-- Regression: cloudsync_payload_blob_checked must restore PG_exception_stack on
+-- its SUCCESS path.
+--
+-- The function returns its result with PG_RETURN_BYTEA_P(result) (and
+-- PG_RETURN_NULL on the empty-blob path) from INSIDE its PG_TRY()/PG_END_TRY()
+-- block. PG_RETURN_* expands to `return`, so PG_END_TRY() never runs and
+-- PG_exception_stack is left pointing at this function's now-dead stack frame.
+-- A later ereport(ERROR) raised in the SAME query then siglongjmp()s into that
+-- freed frame (before the enclosing portal's PG_END_TRY can restore the stack)
+-- and the backend segfaults. This is the exact hazard the PR documents and
+-- fixes in cloudsync_changes_select, reintroduced here.
+--
+-- Reproduction: one top-level query that evaluates blob_checked (the divide's
+-- operand forces it to run first) and then raises integer division-by-zero.
+-- BUG PRESENT -> backend crash; psql reports "server closed the connection
+-- unexpectedly" and the run aborts at the SELECT below.
+-- BUG FIXED -> a clean "division by zero" SQL error; the session survives
+-- and the [PASS] line is reached.
+
+\set testid '53-pg-try-stack'
+\ir helper_test_init.sql
+
+\connect postgres
+\ir helper_psql_conn_setup.sql
+DROP DATABASE IF EXISTS cloudsync_test_53_pg_try;
+CREATE DATABASE cloudsync_test_53_pg_try;
+
+\connect cloudsync_test_53_pg_try
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+
+CREATE TABLE blob_checked_test (
+ id TEXT PRIMARY KEY,
+ note TEXT DEFAULT ''
+);
+SELECT cloudsync_init('blob_checked_test', 'CLS', 1) AS _init \gset
+INSERT INTO blob_checked_test(id, note) VALUES ('a', 'hello'), ('b', 'world');
+
+-- Sanity: blob_checked returns a non-NULL payload, so the call exercises the
+-- success return path that carries the bug. (A standalone call is healed when
+-- the statement completes and the portal's PG_END_TRY restores the stack, so
+-- this line is safe on its own.)
+SELECT cloudsync_payload_blob_checked(0, 0, cloudsync_siteid(), false, 1000000000) IS NOT NULL AS have_blob \gset
+\if :have_blob
+\echo [PASS] (:testid) blob_checked produced a payload (exercises the success return path)
+\else
+\echo [FAIL] (:testid) blob_checked returned NULL - cannot exercise the buggy path
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- The post-call error is expected; it must not abort the script so we can check
+-- whether the session survived it.
+\set ON_ERROR_STOP off
+
+-- Single top-level query: blob_checked runs first (operand of the divide),
+-- leaving PG_exception_stack dangling on the buggy build, then int4div raises
+-- ereport(ERROR) while that dangling frame is still the active longjmp target.
+SELECT (cloudsync_payload_blob_checked(0, 0, cloudsync_siteid(), false, 1000000000) IS NOT NULL)::int / 0 AS boom;
+
+\set ON_ERROR_STOP on
+
+-- Reaching here means the backend is still alive: the dangling-frame siglongjmp
+-- did NOT happen, i.e. PG_END_TRY ran on the success path. On the buggy build
+-- the connection is already gone and psql never runs this.
+SELECT 1 AS still_alive \gset
+\if :{?still_alive}
+\echo [PASS] (:testid) session survived a post-call error - PG_exception_stack was restored
+\else
+\echo [FAIL] (:testid) session did not survive a post-call error
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\ir helper_test_cleanup.sql
+\if :should_cleanup
+\connect postgres
+DROP DATABASE IF EXISTS cloudsync_test_53_pg_try;
+\endif
diff --git a/test/postgresql/54_payload_chunks_fragment_state.sql b/test/postgresql/54_payload_chunks_fragment_state.sql
new file mode 100644
index 00000000..d3fd2d06
--- /dev/null
+++ b/test/postgresql/54_payload_chunks_fragment_state.sql
@@ -0,0 +1,105 @@
+-- Reproduction probe for finding #2: cloudsync_payload_chunks() stages
+-- fragment-spanning state (tbl/pk/col_name/col_value/site_id) that is palloc'd
+-- by payload_chunks_fetch_current in the SRF's per-call memory context, then
+-- read again on later SRF_PERCALL invocations when emitting fragments 1..N of a
+-- single oversized value. Per the SRF protocol only multi_call_memory_ctx is
+-- guaranteed to survive between calls, so reading that state on a later call is
+-- a use-after-free in theory.
+--
+-- This test forces a single column value large enough to span MANY fragments
+-- (each ~256KB, the technical minimum), gives each fragment region a distinct
+-- byte pattern so any cross-fragment corruption is visible, round-trips the
+-- chunks through apply on a fresh database, and compares the recovered value to
+-- the source byte-for-byte. If fragments 1..N read freed/clobbered memory the
+-- reassembly checksum fails ("inconsistent v3 fragments") or the recovered value
+-- differs -> [FAIL]. If the per-call state survives (current FROM-clause path)
+-- it round-trips cleanly -> [PASS].
+
+\set testid '54-frag-state'
+\ir helper_test_init.sql
+
+\connect postgres
+\ir helper_psql_conn_setup.sql
+DROP DATABASE IF EXISTS cloudsync_test_54_frag_src;
+DROP DATABASE IF EXISTS cloudsync_test_54_frag_dst;
+CREATE DATABASE cloudsync_test_54_frag_src;
+CREATE DATABASE cloudsync_test_54_frag_dst;
+
+\connect cloudsync_test_54_frag_src
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+CREATE TABLE frag_test (
+ id TEXT PRIMARY KEY,
+ note TEXT DEFAULT ''
+);
+SELECT cloudsync_init('frag_test', 'CLS', 1) AS _init_src \gset
+SELECT cloudsync_set('payload_max_chunk_size', '1'); -- clamps to 256KB minimum
+
+-- One oversized value (~1.18 MB) whose fragments straddle distinct regions:
+-- A...A | B...B | C...C | D...D | E...E. Cross-fragment corruption (reading the
+-- wrong region after a per-call context reset) would change the recovered text.
+INSERT INTO frag_test(id, note)
+VALUES ('big', repeat('A', 262144) || repeat('B', 262144) || repeat('C', 262144)
+ || repeat('D', 262144) || repeat('E', 131072));
+
+-- Confirm this single value actually fragments into several v3 chunks.
+SELECT
+ count(*) FILTER (WHERE get_byte(payload, 4) = 3) AS v3_chunks,
+ count(*) AS total_chunks
+FROM cloudsync_payload_chunks() \gset
+\if :{?v3_chunks}
+\endif
+SELECT (:v3_chunks::int >= 3) AS multi_fragment_ok \gset
+\if :multi_fragment_ok
+\echo [PASS] (:testid) Oversized value fragmented into :v3_chunks v3 fragments (:total_chunks chunks total)
+\else
+\echo [FAIL] (:testid) Expected >=3 v3 fragments, got :v3_chunks (test cannot exercise multi-fragment state)
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+SELECT
+ md5(note) AS src_md5,
+ length(note) AS src_len
+FROM frag_test WHERE id = 'big' \gset
+
+-- Capture every chunk for transport, in generation order.
+SELECT string_agg(encode(payload, 'hex'), ',' ORDER BY chunk_index) AS chunks_hex
+FROM cloudsync_payload_chunks() \gset
+
+\connect cloudsync_test_54_frag_dst
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+CREATE TABLE frag_test (
+ id TEXT PRIMARY KEY,
+ note TEXT DEFAULT ''
+);
+SELECT cloudsync_init('frag_test', 'CLS', 1) AS _init_dst \gset
+SELECT cloudsync_set('payload_max_chunk_size', '262144');
+
+CREATE TEMP TABLE chunk_transport(ord INT, payload BYTEA);
+INSERT INTO chunk_transport(ord, payload)
+SELECT ord::int, decode(chunk_hex, 'hex')
+FROM unnest(string_to_array(:'chunks_hex', ',')) WITH ORDINALITY AS t(chunk_hex, ord);
+
+SELECT coalesce(sum(cloudsync_payload_apply(payload)), 0) AS applied_rows
+FROM (SELECT payload FROM chunk_transport ORDER BY ord) AS ordered \gset
+
+SELECT
+ md5(note) AS dst_md5,
+ length(note) AS dst_len
+FROM frag_test WHERE id = 'big' \gset
+
+SELECT (:'dst_md5' = :'src_md5' AND :dst_len::bigint = :src_len::bigint) AS frag_roundtrip_ok \gset
+\if :frag_roundtrip_ok
+\echo [PASS] (:testid) Multi-fragment oversized value round-trips byte-exact (:src_len bytes)
+\else
+\echo [FAIL] (:testid) Multi-fragment value corrupted across SRF calls (src_len=:src_len dst_len=:dst_len src_md5=:src_md5 dst_md5=:dst_md5)
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\ir helper_test_cleanup.sql
+\if :should_cleanup
+\connect postgres
+DROP DATABASE IF EXISTS cloudsync_test_54_frag_src;
+DROP DATABASE IF EXISTS cloudsync_test_54_frag_dst;
+\endif
diff --git a/test/postgresql/55_payload_chunks_positional_resume.sql b/test/postgresql/55_payload_chunks_positional_resume.sql
new file mode 100644
index 00000000..c1c1f95f
--- /dev/null
+++ b/test/postgresql/55_payload_chunks_positional_resume.sql
@@ -0,0 +1,164 @@
+-- Payload chunks positional-cursor resume
+--
+-- Proves the positional cursor on cloudsync_payload_chunks tiles a window exactly:
+-- resuming at any chunk's (next_db_version, next_seq, next_frag_offset) reproduces
+-- the following chunk byte-for-byte, including boundaries that fall inside a single
+-- committed db_version and inside a value larger than the chunk budget. No spool
+-- table, no idempotent overlap.
+--
+-- Part 2 is end-to-end: drain the whole window the way the /check job will (one
+-- chunk per call via the positional cursor), apply that stream to a fresh database,
+-- and assert the receiver's table content hashes identically to the source.
+
+\set testid '55-positional'
+\ir helper_test_init.sql
+
+\connect postgres
+\ir helper_psql_conn_setup.sql
+DROP DATABASE IF EXISTS cloudsync_test_55_positional;
+DROP DATABASE IF EXISTS cloudsync_test_55_positional_dst;
+CREATE DATABASE cloudsync_test_55_positional;
+CREATE DATABASE cloudsync_test_55_positional_dst;
+
+\connect cloudsync_test_55_positional
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+CREATE TABLE split_test (id TEXT PRIMARY KEY, body BYTEA DEFAULT '\x'::bytea);
+SELECT cloudsync_init('split_test', 'CLS', 1) AS _init \gset
+SELECT cloudsync_set('payload_max_chunk_size', '262144');
+
+-- tx1: many medium incompressible rows in one statement -> a single db_version
+-- split across several chunks (row-boundary resumes, incl. resumes landing INSIDE
+-- one committed version that the legacy since>db_version cursor could not express).
+INSERT INTO split_test(id, body)
+SELECT format('row-%s', lpad(i::text, 4, '0')),
+ decode((SELECT string_agg(md5((i * 1000 + j)::text), '') FROM generate_series(1, 88) AS s(j)), 'hex')
+FROM generate_series(1, 500) AS g(i);
+
+-- tx2: one value larger than the chunk budget -> v3 fragments (mid-fragment resumes).
+INSERT INTO split_test(id, body)
+VALUES ('big', decode((SELECT string_agg(md5(j::text), '') FROM generate_series(1, 30000) AS s(j)), 'hex'));
+
+-- For each non-final chunk of the full-window scan, resume at its reported cursor
+-- and fetch the first chunk; it must equal the next chunk of the full scan. The
+-- correlated SRF subquery uses ORDER BY ... LIMIT 1 so each resume call drains
+-- fully (no early-terminated value-per-call SRF).
+WITH base AS (
+ SELECT chunk_index, payload, next_db_version, next_seq, next_frag_offset, is_final, watermark_db_version
+ FROM cloudsync_payload_chunks(0, cloudsync_siteid(), NULL, false)
+),
+resumed AS (
+ SELECT b.chunk_index,
+ (b.next_frag_offset > 0) AS is_frag_boundary,
+ (SELECT r.payload
+ FROM cloudsync_payload_chunks(NULL, cloudsync_siteid(),
+ (SELECT max(watermark_db_version) FROM base), false,
+ b.next_db_version, b.next_seq, b.next_frag_offset) r
+ ORDER BY r.chunk_index LIMIT 1) AS next_payload
+ FROM base b
+ WHERE NOT b.is_final
+)
+SELECT
+ (SELECT count(*) FROM base) AS base_count,
+ coalesce((SELECT bool_and(r.next_payload = b2.payload)
+ FROM resumed r JOIN base b2 ON b2.chunk_index = r.chunk_index + 1), false) AS chunks_identical,
+ coalesce((SELECT bool_or(is_frag_boundary) FROM resumed), false) AS saw_frag
+\gset
+
+SELECT (:base_count::int >= 4) AS enough_chunks \gset
+\if :enough_chunks
+\echo [PASS] (:testid) window produced multiple chunks (:base_count)
+\else
+\echo [FAIL] (:testid) expected a multi-chunk window, got :base_count
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\if :chunks_identical
+\echo [PASS] (:testid) positional resume reproduced every following chunk byte-for-byte
+\else
+\echo [FAIL] (:testid) a positional resume did not reproduce the next chunk
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\if :saw_frag
+\echo [PASS] (:testid) mid-fragment resume exercised
+\else
+\echo [FAIL] (:testid) mid-fragment resume not exercised
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+-- Part 2: end-to-end drain + apply round-trip.
+--
+-- Drain the window exactly as the /check job will: start with the legacy
+-- exclusive cursor (since=0), then step the positional cursor one chunk per call,
+-- collecting payloads in drain order. ORDER BY chunk_index LIMIT 1 forces each
+-- value-per-call SRF to run to completion (no early-terminated cursor). The drained
+-- chunks are returned hex-joined so they can cross \connect into the receiver DB.
+CREATE OR REPLACE FUNCTION _positional_drain_hex() RETURNS text LANGUAGE plpgsql AS $$
+DECLARE
+ rdbv bigint; rseq bigint; rfrag bigint; wm bigint := 0;
+ rec record; parts text[] := '{}'; guard int := 0;
+BEGIN
+ LOOP
+ guard := guard + 1;
+ IF guard > 100000 THEN RAISE EXCEPTION 'positional drain did not terminate'; END IF;
+ IF wm = 0 THEN
+ SELECT * INTO rec FROM cloudsync_payload_chunks(0, cloudsync_siteid(), NULL, false)
+ ORDER BY chunk_index LIMIT 1;
+ IF NOT FOUND THEN EXIT; END IF;
+ wm := rec.watermark_db_version;
+ ELSE
+ SELECT * INTO rec FROM cloudsync_payload_chunks(NULL, cloudsync_siteid(), wm, false, rdbv, rseq, rfrag)
+ ORDER BY chunk_index LIMIT 1;
+ IF NOT FOUND THEN EXIT; END IF;
+ END IF;
+ parts := array_append(parts, encode(rec.payload, 'hex'));
+ rdbv := rec.next_db_version; rseq := rec.next_seq; rfrag := rec.next_frag_offset;
+ EXIT WHEN rec.is_final;
+ END LOOP;
+ RETURN array_to_string(parts, ',');
+END $$;
+
+SELECT _positional_drain_hex() AS chunks_hex \gset
+SELECT
+ md5(string_agg(id || ':' || encode(body, 'hex'), '|' ORDER BY id)) AS src_hash,
+ count(*) AS src_count
+FROM split_test \gset
+
+\connect cloudsync_test_55_positional_dst
+\ir helper_psql_conn_setup.sql
+CREATE EXTENSION IF NOT EXISTS cloudsync;
+CREATE TABLE split_test (id TEXT PRIMARY KEY, body BYTEA DEFAULT '\x'::bytea);
+SELECT cloudsync_init('split_test', 'CLS', 1) AS _init_dst \gset
+SELECT cloudsync_set('payload_max_chunk_size', '262144');
+
+-- Reconstitute the drained chunks and apply them (reverse order on purpose: apply
+-- must be order-independent and reassemble fragments regardless).
+CREATE TEMP TABLE chunk_transport(ord int, payload bytea);
+INSERT INTO chunk_transport(ord, payload)
+SELECT ord::int, decode(chunk_hex, 'hex')
+FROM unnest(string_to_array(:'chunks_hex', ',')) WITH ORDINALITY AS t(chunk_hex, ord);
+
+SELECT coalesce(sum(cloudsync_payload_apply(payload)), 0) AS applied_rows
+FROM (SELECT payload FROM chunk_transport ORDER BY ord DESC) AS ordered \gset
+
+SELECT
+ md5(string_agg(id || ':' || encode(body, 'hex'), '|' ORDER BY id)) AS dst_hash,
+ count(*) AS dst_count
+FROM split_test \gset
+
+SELECT (:'dst_hash' = :'src_hash' AND :dst_count::int = :src_count::int
+ AND :dst_count::int > 0) AS roundtrip_ok \gset
+\if :roundtrip_ok
+\echo [PASS] (:testid) positional drain applied to a fresh database reproduces the source (:dst_count rows)
+\else
+\echo [FAIL] (:testid) drain/apply mismatch (src_count=:src_count dst_count=:dst_count hashes :'src_hash' vs :'dst_hash')
+SELECT (:fail::int + 1) AS fail \gset
+\endif
+
+\ir helper_test_cleanup.sql
+\if :should_cleanup
+\connect postgres
+DROP DATABASE IF EXISTS cloudsync_test_55_positional;
+DROP DATABASE IF EXISTS cloudsync_test_55_positional_dst;
+\endif
diff --git a/test/postgresql/full_test.sql b/test/postgresql/full_test.sql
index 9ff000a6..c6f38b71 100644
--- a/test/postgresql/full_test.sql
+++ b/test/postgresql/full_test.sql
@@ -59,6 +59,10 @@
\ir 49_row_filter_prefill.sql
\ir 50_block_lww_existing_data.sql
\ir 51_stale_table_settings_dropped_meta.sql
+\ir 52_payload_chunks.sql
+\ir 53_payload_blob_checked_pg_try.sql
+\ir 54_payload_chunks_fragment_state.sql
+\ir 55_payload_chunks_positional_resume.sql
-- 'Test summary'
\echo '\nTest summary:'
diff --git a/test/sync_bench.c b/test/sync_bench.c
index 946fffa9..5fd496aa 100644
--- a/test/sync_bench.c
+++ b/test/sync_bench.c
@@ -134,6 +134,21 @@ static int timed_query_text(sqlite3 *db, const char *sql, char **out, double *st
return rc;
}
+// Runs a cloudsync_network_* scalar function. In trace mode it captures and prints
+// the JSON the function returns; otherwise it executes without keeping the result.
+static int db_exec_network(sqlite3 *db, const char *operation, const char *sql) {
+#ifdef CLOUDSYNC_NETWORK_TRACE
+ char *result = NULL;
+ int rc = query_text(db, sql, &result);
+ bench_trace("response op=%s json=%s", operation, result ? result : "(null)");
+ free(result);
+ return rc;
+#else
+ (void)operation;
+ return db_exec(db, sql);
+#endif
+}
+
static int open_load_ext(const char *db_path, sqlite3 **out_db) {
bench_trace("step=open-load-extension db_path=%s begin", db_path);
sqlite3 *db = NULL;
@@ -205,8 +220,10 @@ static int init_network(sqlite3 *db, const char *label, const char *database_id,
if (rc != SQLITE_OK) return rc;
}
+ // Drain any pre-existing backlog so the measured send->apply later starts from a
+ // clean baseline. This is untimed warm-up, not part of the latency measurement.
bench_trace("step=pre-measure-sync db=%s begin sql=cloudsync_network_sync(500,4)", label);
- rc = db_exec(db, "SELECT cloudsync_network_sync(500, 4);");
+ rc = db_exec_network(db, "pre-measure-sync", "SELECT cloudsync_network_sync(500, 4);");
bench_trace("step=pre-measure-sync db=%s end rc=%d", label, rc);
return rc;
}
@@ -383,6 +400,9 @@ static int timed_request(sqlite3 *db, sync_bench_request *request, const char *o
if (strcmp(operation, "check") == 0 && request->result_json) {
request->rows_received = json_int_at_path(db, request->result_json, "$.receive.rows", -1);
}
+ // Surface the raw JSON returned by the network function (send/check) under trace.
+ bench_trace("response op=%s attempt=%d json=%s", operation, attempt,
+ request->result_json ? request->result_json : "(null)");
return request->sqlite_rc;
}
@@ -522,6 +542,8 @@ int main(void) {
remove(DB_B_PATH);
cloudsync_memory_init(1);
+ // Step 1 - setup: open both local databases (sender A, receiver B), load the
+ // extension, create the schema, attach to the network, and drain to a clean baseline.
bench_trace("step=benchmark-setup begin database_id=%s poll_delay_ms=%d max_polls=%d", database_id, poll_delay_ms, max_polls);
rc = setup_database("db_a", DB_A_PATH, database_id, address, apikey, &db_a);
if (rc != SQLITE_OK) goto cleanup;
@@ -529,15 +551,19 @@ int main(void) {
if (rc != SQLITE_OK) goto cleanup;
bench_trace("step=benchmark-setup end rc=%d", rc);
+ // Prune stale rows from prior runs and push those deletions so they don't pollute
+ // the receiver's backlog during the measured round-trip.
rc = cleanup_old_benchmark_rows(db_a, cleanup_older_than_seconds, &cleanup_deleted_rows);
if (rc != SQLITE_OK) goto cleanup;
if (cleanup_deleted_rows > 0) {
bench_trace("step=cleanup-send db=db_a deleted=%d begin sql=cloudsync_network_send_changes", cleanup_deleted_rows);
- rc = db_exec(db_a, "SELECT cloudsync_network_send_changes();");
+ rc = db_exec_network(db_a, "cleanup-send", "SELECT cloudsync_network_send_changes();");
bench_trace("step=cleanup-send db=db_a deleted=%d end rc=%d", cleanup_deleted_rows, rc);
if (rc != SQLITE_OK) goto cleanup;
}
+ // Step 3 - build the unique benchmark row: a UUIDv7 id plus an incompressible
+ // random blob, so the payload survives compression and exercises a realistic size.
cloudsync_uuid_v7_string(row_id, true);
snprintf(marker, sizeof(marker), "sync-bench-%s", row_id);
snprintf(payload, sizeof(payload), "payload-%s", row_id);
@@ -553,9 +579,12 @@ int main(void) {
random_blob = &empty_blob;
}
+ // Insert the row on the sender. This is the change whose propagation we time.
rc = insert_benchmark_row(db_a, row_id, payload, marker, random_blob, random_blob_size);
if (rc != SQLITE_OK) goto cleanup;
+ // Step 4 - precondition: the row must not already exist on the receiver, or the
+ // measurement would be meaningless.
bench_trace("step=verify-before-send db=db_b row_id=%s begin", row_id);
rc = verify_row(db_b, row_id, payload, marker, random_blob, random_blob_size, &applied);
bench_trace("step=verify-before-send db=db_b row_id=%s end rc=%d applied=%s", row_id, rc, applied ? "true" : "false");
@@ -566,6 +595,8 @@ int main(void) {
goto cleanup;
}
+ // Step 5 - measured send: push the row from A and capture the send summary
+ // (status / localVersion / serverVersion). total_start_ms anchors the latency clock.
bench_trace("step=send db=db_a row_id=%s begin sql=cloudsync_network_send_changes", row_id);
rc = timed_request(db_a, &requests[request_count++], "send", 1, "SELECT cloudsync_network_send_changes();");
bench_trace("step=send db=db_a row_id=%s end rc=%d elapsed_ms=%.2f", row_id, rc, requests[request_count - 1].elapsed_ms);
@@ -575,6 +606,8 @@ int main(void) {
send_summary.server_version = json_int_at_path(db_a, requests[0].result_json, "$.send.serverVersion", -1);
double total_start_ms = requests[0].started_ms;
+ // Step 6 - poll loop: receive on B (sleep between attempts) until the row is
+ // applied and verified, or max_polls is exhausted.
for (int i = 0; i < max_polls; i++) {
if (i > 0 && poll_delay_ms > 0) {
bench_trace("step=poll-sleep attempt=%d delay_ms=%d begin", i + 1, poll_delay_ms);
@@ -584,8 +617,8 @@ int main(void) {
poll_sleep_ms += sleep_elapsed_ms;
bench_trace("step=poll-sleep attempt=%d end elapsed_ms=%.2f", i + 1, sleep_elapsed_ms);
}
- bench_trace("step=check db=db_b attempt=%d row_id=%s begin sql=cloudsync_network_check_changes", i + 1, row_id);
- rc = timed_request(db_b, &requests[request_count++], "check", i + 1, "SELECT cloudsync_network_check_changes();");
+ bench_trace("step=check db=db_b attempt=%d row_id=%s begin sql=cloudsync_network_receive_changes", i + 1, row_id);
+ rc = timed_request(db_b, &requests[request_count++], "check", i + 1, "SELECT cloudsync_network_receive_changes();");
polls = i + 1;
bench_trace("step=check db=db_b attempt=%d row_id=%s end rc=%d rows=%d elapsed_ms=%.2f", i + 1, row_id, rc, requests[request_count - 1].rows_received, requests[request_count - 1].elapsed_ms);
if (rc != SQLITE_OK) goto cleanup;
@@ -609,6 +642,8 @@ int main(void) {
rc = SQLITE_BUSY;
}
+ // Step 7 - aggregate timings: split the end-to-end latency into time spent in
+ // network requests, in poll sleeps, and the remaining local overhead.
for (int i = 0; i < request_count; i++) request_ms += requests[i].elapsed_ms;
measured_overhead_ms = total_ms - request_ms - poll_sleep_ms;
if (measured_overhead_ms < 0.0 && measured_overhead_ms > -0.01) measured_overhead_ms = 0.0;
diff --git a/test/unit.c b/test/unit.c
index 05e9c958..38597888 100644
--- a/test/unit.c
+++ b/test/unit.c
@@ -1901,6 +1901,11 @@ bool do_test_dbutils (void) {
rc = dbutils_settings_get_value(data, "key2", buffer, &blen, NULL);
if (rc != SQLITE_OK) goto finalize;
if (buffer[0] != 0) goto finalize;
+
+ dbutils_settings_set_key_value(data, "decimal_key", "010");
+ int64_t intvalue = 0;
+ rc = dbutils_settings_get_value(data, "decimal_key", NULL, NULL, &intvalue);
+ if (rc != SQLITE_OK || intvalue != 10) goto finalize;
// test table settings
rc = dbutils_table_settings_set_key_value(data, NULL, NULL, NULL, NULL);
@@ -11941,6 +11946,994 @@ bool do_test_corrupted_payload (int nclients, bool print_result, bool cleanup_da
return result;
}
+typedef struct {
+ void *data;
+ int len;
+} test_payload_chunk;
+
+static void test_payload_chunks_free(test_payload_chunk *chunks, int count) {
+ if (!chunks) return;
+ for (int i = 0; i < count; ++i) {
+ if (chunks[i].data) free(chunks[i].data);
+ }
+ free(chunks);
+}
+
+static bool test_payload_chunks_tables_equal(sqlite3 *src, sqlite3 *dst) {
+ sqlite3_stmt *s1 = NULL;
+ sqlite3_stmt *s2 = NULL;
+ bool result = false;
+
+ const char *sql = "SELECT id, note, data FROM payload_chunk_test ORDER BY id;";
+ int rc1 = sqlite3_prepare_v2(src, sql, -1, &s1, NULL);
+ int rc2 = sqlite3_prepare_v2(dst, sql, -1, &s2, NULL);
+ if (rc1 != SQLITE_OK || rc2 != SQLITE_OK) goto finalize;
+
+ while (1) {
+ rc1 = sqlite3_step(s1);
+ rc2 = sqlite3_step(s2);
+ if (rc1 != rc2) goto finalize;
+ if (rc1 == SQLITE_DONE) break;
+ if (rc1 != SQLITE_ROW) goto finalize;
+
+ for (int i = 0; i < 3; ++i) {
+ int t1 = sqlite3_column_type(s1, i);
+ int t2 = sqlite3_column_type(s2, i);
+ int n1 = sqlite3_column_bytes(s1, i);
+ int n2 = sqlite3_column_bytes(s2, i);
+ if (t1 != t2 || n1 != n2) goto finalize;
+ const void *v1 = (t1 == SQLITE_BLOB) ? sqlite3_column_blob(s1, i) : sqlite3_column_text(s1, i);
+ const void *v2 = (t2 == SQLITE_BLOB) ? sqlite3_column_blob(s2, i) : sqlite3_column_text(s2, i);
+ if (n1 > 0 && (!v1 || !v2 || memcmp(v1, v2, n1) != 0)) goto finalize;
+ }
+ }
+
+ result = true;
+
+finalize:
+ if (s1) sqlite3_finalize(s1);
+ if (s2) sqlite3_finalize(s2);
+ return result;
+}
+
+bool do_test_payload_chunks_large_values (bool print_result, bool cleanup_databases) {
+ sqlite3 *db[3] = {NULL, NULL, NULL};
+ sqlite3_stmt *stmt = NULL;
+ sqlite3_stmt *apply = NULL;
+ test_payload_chunk *chunks = NULL;
+ int chunk_count = 0;
+ int chunk_cap = 0;
+ int v3_count = 0;
+ int first_v3_chunk = -1;
+ int legacy_payload_len = 0;
+ bool result = false;
+ int rc = SQLITE_OK;
+ const int max_chunk_size = CLOUDSYNC_PAYLOAD_CHUNK_MIN_SIZE;
+
+ time_t timestamp = time(NULL);
+ int saved_counter = test_counter++;
+
+ for (int i = 0; i < 3; ++i) {
+ db[i] = do_create_database_file(i, timestamp, saved_counter);
+ if (!db[i]) goto finalize;
+
+ rc = sqlite3_exec(db[i],
+ "CREATE TABLE payload_chunk_test ("
+ "id TEXT PRIMARY KEY, "
+ "note TEXT DEFAULT '', "
+ "data BLOB DEFAULT x'');"
+ "SELECT cloudsync_init('payload_chunk_test');",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ }
+
+ rc = sqlite3_exec(db[0],
+ "SELECT cloudsync_set('payload_max_chunk_size', '262144');"
+ "INSERT INTO payload_chunk_test(id, note, data) "
+ "VALUES ('big', lower(hex(randomblob(360000))), randomblob(720000));"
+ "INSERT INTO payload_chunk_test(id, note, data) "
+ "VALUES ('same-a', 'same payload a', zeroblob(720000));"
+ "INSERT INTO payload_chunk_test(id, note, data) "
+ "VALUES ('same-b', 'same payload b', zeroblob(720000));"
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < 260) "
+ "INSERT INTO payload_chunk_test(id, note, data) "
+ "SELECT printf('row-%03d', i), printf('small-%03d-%s', i, hex(randomblob(850))), randomblob(512) FROM c;",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ rc = sqlite3_prepare_v2(db[0],
+ "SELECT payload, payload_size, rows FROM cloudsync_payload_chunks() ORDER BY chunk_index;",
+ -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
+ int len = sqlite3_column_bytes(stmt, 0);
+ const void *payload = sqlite3_column_blob(stmt, 0);
+ sqlite3_int64 payload_size = sqlite3_column_int64(stmt, 1);
+ sqlite3_int64 rows = sqlite3_column_int64(stmt, 2);
+ if (!payload || len <= 0 || payload_size != len || len > max_chunk_size || rows <= 0) goto finalize;
+ if (len > 4 && ((const unsigned char *)payload)[4] == 3) {
+ if (first_v3_chunk < 0) first_v3_chunk = chunk_count;
+ ++v3_count;
+ }
+
+ if (chunk_count == chunk_cap) {
+ int new_cap = chunk_cap ? chunk_cap * 2 : 16;
+ test_payload_chunk *new_chunks = realloc(chunks, sizeof(*chunks) * new_cap);
+ if (!new_chunks) goto finalize;
+ memset(new_chunks + chunk_cap, 0, sizeof(*chunks) * (new_cap - chunk_cap));
+ chunks = new_chunks;
+ chunk_cap = new_cap;
+ }
+
+ chunks[chunk_count].data = malloc(len);
+ if (!chunks[chunk_count].data) goto finalize;
+ memcpy(chunks[chunk_count].data, payload, len);
+ chunks[chunk_count].len = len;
+ ++chunk_count;
+ }
+ if (rc != SQLITE_DONE) goto finalize;
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ if (chunk_count < 5 || v3_count < 2) goto finalize;
+
+ rc = sqlite3_prepare_v2(db[1], "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ // Apply in reverse order to verify that v3 value fragments can be staged
+ // and completed independently from transport ordering.
+ for (int i = chunk_count - 1; i >= 0; --i) {
+ rc = sqlite3_bind_blob(apply, 1, chunks[i].data, chunks[i].len, SQLITE_STATIC);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(apply);
+ if (rc != SQLITE_ROW) goto finalize;
+ sqlite3_reset(apply);
+ sqlite3_clear_bindings(apply);
+ }
+ sqlite3_finalize(apply);
+ apply = NULL;
+
+ if (!test_payload_chunks_tables_equal(db[0], db[1])) goto finalize;
+
+ if (first_v3_chunk < 0) goto finalize;
+
+ // Reopen db[1] so the v3-fragment stale-GC throttle starts fresh on this
+ // connection: the first fragment applied below runs the stale cleanup.
+ close_db(db[1]);
+ db[1] = do_create_database_file(1, timestamp, saved_counter);
+ if (!db[1]) goto finalize;
+
+ rc = sqlite3_exec(db[1],
+ "CREATE TABLE IF NOT EXISTS cloudsync_payload_fragments ("
+ "value_id TEXT NOT NULL, part_index INTEGER NOT NULL, part_count INTEGER NOT NULL, total_size INTEGER NOT NULL, "
+ "checksum TEXT NOT NULL, created_at INTEGER NOT NULL DEFAULT (unixepoch()), "
+ "tbl TEXT NOT NULL, pk BLOB NOT NULL, col_name TEXT NOT NULL, col_version INTEGER NOT NULL, db_version INTEGER NOT NULL, "
+ "site_id BLOB NOT NULL, cl INTEGER NOT NULL, seq INTEGER NOT NULL, fragment BLOB NOT NULL, "
+ "PRIMARY KEY(value_id, part_index)) WITHOUT ROWID;"
+ "INSERT OR REPLACE INTO cloudsync_payload_fragments "
+ "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) "
+ "VALUES ('stale-incomplete', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', x'01', 'data', 1, 1, zeroblob(16), 1, 1, x'00');",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ // First fragment apply on the fresh connection -> stale GC runs -> removed.
+ rc = sqlite3_prepare_v2(db[1], "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_bind_blob(apply, 1, chunks[first_v3_chunk].data, chunks[first_v3_chunk].len, SQLITE_STATIC);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(apply);
+ if (rc != SQLITE_ROW) goto finalize;
+ sqlite3_reset(apply);
+ sqlite3_clear_bindings(apply);
+
+ rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM cloudsync_payload_fragments WHERE value_id='stale-incomplete';", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 0) goto finalize;
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ // Throttle check: an old group inserted now must NOT be removed by the very
+ // next fragment apply, because the GC just ran (within the throttle window).
+ // This proves the cleanup is not re-scanned on every applied fragment.
+ rc = sqlite3_exec(db[1],
+ "INSERT OR REPLACE INTO cloudsync_payload_fragments "
+ "(value_id, part_index, part_count, total_size, checksum, created_at, tbl, pk, col_name, col_version, db_version, site_id, cl, seq, fragment) "
+ "VALUES ('stale-incomplete-2', 0, 2, 10, '0000000000000000', 0, 'payload_chunk_test', x'02', 'data', 1, 1, zeroblob(16), 1, 1, x'00');",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_bind_blob(apply, 1, chunks[first_v3_chunk].data, chunks[first_v3_chunk].len, SQLITE_STATIC);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(apply);
+ if (rc != SQLITE_ROW) goto finalize;
+ sqlite3_finalize(apply);
+ apply = NULL;
+
+ rc = sqlite3_prepare_v2(db[1], "SELECT COUNT(*) FROM cloudsync_payload_fragments WHERE value_id='stale-incomplete-2';", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 1) goto finalize; // throttled -> still present
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ // The legacy aggregate API must still produce an applyable monolithic payload.
+ rc = sqlite3_prepare_v2(db[0],
+ "SELECT cloudsync_payload_encode(tbl, pk, col_name, col_value, col_version, db_version, site_id, cl, seq) "
+ "FROM cloudsync_changes WHERE site_id=cloudsync_siteid();",
+ -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW) goto finalize;
+ legacy_payload_len = sqlite3_column_bytes(stmt, 0);
+ if (legacy_payload_len <= max_chunk_size) goto finalize;
+ rc = sqlite3_prepare_v2(db[2], "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_bind_value(apply, 1, sqlite3_column_value(stmt, 0));
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(apply);
+ if (rc != SQLITE_ROW) goto finalize;
+ sqlite3_finalize(apply);
+ apply = NULL;
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ if (!test_payload_chunks_tables_equal(db[0], db[2])) goto finalize;
+
+ rc = sqlite3_prepare_v2(db[0], "SELECT length(cloudsync_payload_blob_checked(0, 0, NULL, 0, 10000000));", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW || sqlite3_column_int(stmt, 0) != legacy_payload_len) goto finalize;
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ rc = sqlite3_prepare_v2(db[0], "SELECT cloudsync_payload_blob_checked(999999, 0, NULL, 0, 10000000) IS NULL;", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 1) goto finalize;
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ rc = sqlite3_prepare_v2(db[0], "SELECT cloudsync_payload_blob_checked(0, 0, NULL, 0, 1);", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ if (sqlite3_step(stmt) != SQLITE_ERROR) goto finalize;
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+
+ result = true;
+
+finalize:
+ if (!result && print_result) {
+ printf("do_test_payload_chunks_large_values error: %s / %s\n",
+ db[0] ? sqlite3_errmsg(db[0]) : "no source db",
+ db[1] ? sqlite3_errmsg(db[1]) : "no dest db");
+ }
+ if (stmt) sqlite3_finalize(stmt);
+ if (apply) sqlite3_finalize(apply);
+ test_payload_chunks_free(chunks, chunk_count);
+
+ for (int i = 0; i < 3; ++i) {
+ if (db[i]) close_db(db[i]);
+ }
+
+ if (cleanup_databases) {
+ for (int i = 0; i < 3; ++i) {
+ char path[256], walpath[300], shmpath[300];
+ do_build_database_path(path, i, timestamp, saved_counter);
+ snprintf(walpath, sizeof(walpath), "%s-wal", path);
+ snprintf(shmpath, sizeof(shmpath), "%s-shm", path);
+ file_delete_internal(path);
+ file_delete_internal(walpath);
+ file_delete_internal(shmpath);
+ }
+ }
+
+ return result;
+}
+
+// Apply every payload chunk produced by `q` (column 0 = payload blob, run on
+// src) into `dst`. Returns SQLITE_OK on success.
+static int test_chunks_apply_all (sqlite3_stmt *q, sqlite3 *dst) {
+ sqlite3_stmt *apply = NULL;
+ int rc = sqlite3_prepare_v2(dst, "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL);
+ if (rc != SQLITE_OK) return rc;
+ while ((rc = sqlite3_step(q)) == SQLITE_ROW) {
+ sqlite3_bind_blob(apply, 1, sqlite3_column_blob(q, 0), sqlite3_column_bytes(q, 0), SQLITE_STATIC);
+ if (sqlite3_step(apply) != SQLITE_ROW) { sqlite3_finalize(apply); return SQLITE_ERROR; }
+ sqlite3_reset(apply);
+ sqlite3_clear_bindings(apply);
+ }
+ sqlite3_finalize(apply);
+ return (rc == SQLITE_DONE) ? SQLITE_OK : rc;
+}
+
+// Verify table t holds exactly the ids in expected_csv (sorted, '|'-joined).
+static bool test_chunks_ids_equal (sqlite3 *db, const char *expected_csv) {
+ sqlite3_stmt *s = NULL;
+ if (sqlite3_prepare_v2(db, "SELECT coalesce(group_concat(id, '|'), '') FROM (SELECT id FROM t ORDER BY id);", -1, &s, NULL) != SQLITE_OK) return false;
+ bool ok = false;
+ if (sqlite3_step(s) == SQLITE_ROW) {
+ const char *v = (const char *)sqlite3_column_text(s, 0);
+ ok = v && strcmp(v, expected_csv) == 0;
+ }
+ sqlite3_finalize(s);
+ return ok;
+}
+
+// Materialize all payload chunks returned by `sql` (column 0 = payload blob) on `db`.
+static bool test_crdt_extract (sqlite3 *db, const char *sql, test_payload_chunk **out, int *count) {
+ *out = NULL; *count = 0;
+ sqlite3_stmt *stmt = NULL;
+ if (sqlite3_prepare_v2(db, sql, -1, &stmt, NULL) != SQLITE_OK) return false;
+ test_payload_chunk *chunks = NULL;
+ int n = 0, cap = 0, rc;
+ while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
+ int len = sqlite3_column_bytes(stmt, 0);
+ const void *p = sqlite3_column_blob(stmt, 0);
+ if (!p || len <= 0) { rc = SQLITE_ERROR; break; }
+ if (n == cap) {
+ int ncap = cap ? cap * 2 : 8;
+ test_payload_chunk *nc = realloc(chunks, sizeof(*chunks) * ncap);
+ if (!nc) { rc = SQLITE_ERROR; break; }
+ chunks = nc; cap = ncap;
+ }
+ chunks[n].data = malloc(len);
+ if (!chunks[n].data) { rc = SQLITE_ERROR; break; }
+ memcpy(chunks[n].data, p, len);
+ chunks[n].len = len;
+ ++n;
+ }
+ sqlite3_finalize(stmt);
+ if (rc != SQLITE_DONE) { test_payload_chunks_free(chunks, n); return false; }
+ *out = chunks; *count = n;
+ return true;
+}
+
+// Apply one already-materialized chunk to `db`.
+static bool test_crdt_apply (sqlite3 *db, const test_payload_chunk *c) {
+ sqlite3_stmt *apply = NULL;
+ if (sqlite3_prepare_v2(db, "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL) != SQLITE_OK) return false;
+ bool ok = (sqlite3_bind_blob(apply, 1, c->data, c->len, SQLITE_STATIC) == SQLITE_OK &&
+ sqlite3_step(apply) == SQLITE_ROW);
+ sqlite3_finalize(apply);
+ return ok;
+}
+
+// Compare the single crdt_test row (a, b, c) between two databases.
+static bool test_crdt_row_equal (sqlite3 *db1, sqlite3 *db2) {
+ sqlite3_stmt *s1 = NULL, *s2 = NULL;
+ const char *sql = "SELECT a, b, c FROM crdt_test WHERE id='r1';";
+ bool result = false;
+ if (sqlite3_prepare_v2(db1, sql, -1, &s1, NULL) != SQLITE_OK) goto done;
+ if (sqlite3_prepare_v2(db2, sql, -1, &s2, NULL) != SQLITE_OK) goto done;
+ if (sqlite3_step(s1) != SQLITE_ROW || sqlite3_step(s2) != SQLITE_ROW) goto done;
+ for (int i = 0; i < 3; ++i) {
+ int n1 = sqlite3_column_bytes(s1, i), n2 = sqlite3_column_bytes(s2, i);
+ if (n1 != n2) goto done;
+ if (n1 > 0 && memcmp(sqlite3_column_blob(s1, i), sqlite3_column_blob(s2, i), n1) != 0) goto done;
+ }
+ result = true;
+done:
+ if (s1) sqlite3_finalize(s1);
+ if (s2) sqlite3_finalize(s2);
+ return result;
+}
+
+// Read the single-row crdt_test.c blob into a malloc'd buffer.
+static bool test_crdt_get_c (sqlite3 *db, void **out, int *outlen) {
+ *out = NULL; *outlen = 0;
+ sqlite3_stmt *s = NULL;
+ if (sqlite3_prepare_v2(db, "SELECT c FROM crdt_test WHERE id='r1';", -1, &s, NULL) != SQLITE_OK) return false;
+ bool ok = false;
+ if (sqlite3_step(s) == SQLITE_ROW) {
+ int n = sqlite3_column_bytes(s, 0);
+ const void *p = sqlite3_column_blob(s, 0);
+ if (n > 0 && p) { void *buf = malloc(n); if (buf) { memcpy(buf, p, n); *out = buf; *outlen = n; ok = true; } }
+ }
+ sqlite3_finalize(s);
+ return ok;
+}
+
+// Evaluate a scalar-int SQL query on db, true iff the single result is 1.
+static bool test_crdt_scalar_true (sqlite3 *db, const char *sql) {
+ sqlite3_stmt *s = NULL;
+ if (sqlite3_prepare_v2(db, sql, -1, &s, NULL) != SQLITE_OK) return false;
+ bool ok = (sqlite3_step(s) == SQLITE_ROW && sqlite3_column_int(s, 0) == 1);
+ sqlite3_finalize(s);
+ return ok;
+}
+
+// CRDT convergence under chunked / fragmented, non-atomic, interleaved apply.
+// Two sites concurrently change a small column and a big (fragmented) column of the
+// same row, each in ONE transaction (one db_version, several seq). Both sites' chunks
+// are applied to three targets in different orders (A-then-B, B-then-A, interleaved).
+// Column-level LWW must make all three converge to the identical row: the mix
+// a=from-A / b=from-B, with the big value being one site's value intact — fragment
+// value_ids are per (site_id, col_version, checksum), so fragments never cross-mix.
+bool do_test_payload_chunks_crdt_convergence (bool print_result, bool cleanup_databases) {
+ enum { SRC = 0, SITEA = 1, SITEB = 2, TGT1 = 3, TGT2 = 4, TGT3 = 5, NDB = 6 };
+ sqlite3 *db[6] = {0};
+ test_payload_chunk *base = NULL, *ca = NULL, *cb = NULL;
+ int nbase = 0, na = 0, nb = 0;
+ void *cA = NULL, *cB = NULL, *cT = NULL; int lA = 0, lB = 0, lT = 0;
+ bool result = false;
+ int rc = SQLITE_OK;
+
+ time_t timestamp = time(NULL);
+ int saved_counter = test_counter++;
+
+ for (int i = 0; i < NDB; ++i) {
+ db[i] = do_create_database_file(i, timestamp, saved_counter);
+ if (!db[i]) goto finalize;
+ rc = sqlite3_exec(db[i],
+ "CREATE TABLE crdt_test (id TEXT PRIMARY KEY, a TEXT DEFAULT '', b TEXT DEFAULT '', c BLOB DEFAULT x'');"
+ "SELECT cloudsync_init('crdt_test');"
+ "SELECT cloudsync_set('payload_max_chunk_size', '262144');",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ }
+
+ // Baseline row on SRC, propagated to every other db so they share col_version 1.
+ rc = sqlite3_exec(db[SRC],
+ "INSERT INTO crdt_test(id, a, b, c) VALUES ('r1', 'base_a', 'base_b', zeroblob(8));",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ if (!test_crdt_extract(db[SRC],
+ "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=0 ORDER BY chunk_index;",
+ &base, &nbase) || nbase <= 0) goto finalize;
+ for (int i = SITEA; i < NDB; ++i)
+ for (int k = 0; k < nbase; ++k)
+ if (!test_crdt_apply(db[i], &base[k])) goto finalize;
+
+ // Two sites change a small col + the big (fragmented) col, each in ONE transaction.
+ rc = sqlite3_exec(db[SITEA], "UPDATE crdt_test SET a='AAA_from_A', c=randomblob(720000) WHERE id='r1';", NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_exec(db[SITEB], "UPDATE crdt_test SET b='BBB_from_B', c=randomblob(720000) WHERE id='r1';", NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ if (!test_crdt_extract(db[SITEA],
+ "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=1 ORDER BY chunk_index;", &ca, &na)) goto finalize;
+ if (!test_crdt_extract(db[SITEB],
+ "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=1 ORDER BY chunk_index;", &cb, &nb)) goto finalize;
+ // The big value must actually fragment across chunks, else this test proves nothing.
+ if (na < 2 || nb < 2) goto finalize;
+
+ // TGT1: all A then all B.
+ for (int k = 0; k < na; ++k) if (!test_crdt_apply(db[TGT1], &ca[k])) goto finalize;
+ for (int k = 0; k < nb; ++k) if (!test_crdt_apply(db[TGT1], &cb[k])) goto finalize;
+ // TGT2: all B then all A (opposite order).
+ for (int k = 0; k < nb; ++k) if (!test_crdt_apply(db[TGT2], &cb[k])) goto finalize;
+ for (int k = 0; k < na; ++k) if (!test_crdt_apply(db[TGT2], &ca[k])) goto finalize;
+ // TGT3: apply the combined A+B chunk list in a scrambled order (odd combined
+ // indices, then even) — an arbitrary cross-site interleave that also delivers
+ // each site's value fragments out of order. Any permutation must converge: the
+ // merge is commutative and fragments stage by (value_id, part_index), completing
+ // regardless of arrival order.
+ for (int pass = 1; pass >= 0; --pass)
+ for (int j = 0, total = na + nb; j < total; ++j) {
+ if ((j & 1) != pass) continue;
+ const test_payload_chunk *c = (j < na) ? &ca[j] : &cb[j - na];
+ if (!test_crdt_apply(db[TGT3], c)) goto finalize;
+ }
+
+ // (1) Convergence: all three targets hold the identical row regardless of order.
+ if (!test_crdt_row_equal(db[TGT1], db[TGT2])) goto finalize;
+ if (!test_crdt_row_equal(db[TGT1], db[TGT3])) goto finalize;
+
+ // (2) The row is the column-level LWW mix: a from A, b from B.
+ if (!test_crdt_scalar_true(db[TGT1], "SELECT a='AAA_from_A' AND b='BBB_from_B' FROM crdt_test WHERE id='r1';")) goto finalize;
+
+ // (3) The big value is ONE site's value, fully reassembled and intact — fragments
+ // from the two sites never cross-contaminate (distinct value_ids).
+ if (!test_crdt_get_c(db[SITEA], &cA, &lA)) goto finalize;
+ if (!test_crdt_get_c(db[SITEB], &cB, &lB)) goto finalize;
+ if (!test_crdt_get_c(db[TGT1], &cT, &lT)) goto finalize;
+ if (lT != 720000) goto finalize;
+ if ((lT == lA && memcmp(cT, cA, lT) == 0) == (lT == lB && memcmp(cT, cB, lT) == 0)) goto finalize; // exactly one
+
+ result = true;
+
+finalize:
+ if (!result && print_result)
+ printf("do_test_payload_chunks_crdt_convergence error: %s\n", db[SRC] ? sqlite3_errmsg(db[SRC]) : "no db");
+ free(cA); free(cB); free(cT);
+ test_payload_chunks_free(base, nbase);
+ test_payload_chunks_free(ca, na);
+ test_payload_chunks_free(cb, nb);
+ for (int i = 0; i < NDB; ++i) if (db[i]) close_db(db[i]);
+ if (cleanup_databases) {
+ for (int i = 0; i < NDB; ++i) {
+ char path[256], walpath[300], shmpath[300];
+ do_build_database_path(path, i, timestamp, saved_counter);
+ snprintf(walpath, sizeof(walpath), "%s-wal", path);
+ snprintf(shmpath, sizeof(shmpath), "%s-shm", path);
+ file_delete_internal(path);
+ file_delete_internal(walpath);
+ file_delete_internal(shmpath);
+ }
+ }
+ return result;
+}
+
+bool do_test_payload_chunks_site_exclusion (bool print_result, bool cleanup_databases) {
+ sqlite3 *db[4] = {NULL, NULL, NULL, NULL};
+ sqlite3_stmt *stmt = NULL;
+ bool result = false;
+ int rc = SQLITE_OK;
+ unsigned char s1[16] = {0};
+
+ time_t timestamp = time(NULL);
+ int saved_counter = test_counter++;
+
+ for (int i = 0; i < 4; ++i) {
+ db[i] = do_create_database_file(i, timestamp, saved_counter);
+ if (!db[i]) goto finalize;
+ rc = sqlite3_exec(db[i],
+ "CREATE TABLE t (id TEXT PRIMARY KEY, note TEXT DEFAULT '');"
+ "SELECT cloudsync_init('t');", NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ }
+
+ // db[0] = local site S0 changes; db[1] = remote site S1 changes
+ rc = sqlite3_exec(db[0], "INSERT INTO t(id, note) VALUES ('a0','n'),('b0','n');", NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_exec(db[1], "INSERT INTO t(id, note) VALUES ('a1','n'),('b1','n');", NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ // capture S1 (db[1]'s site id)
+ if (sqlite3_prepare_v2(db[1], "SELECT cloudsync_siteid();", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ if (sqlite3_step(stmt) != SQLITE_ROW || sqlite3_column_bytes(stmt, 0) != 16) goto finalize;
+ memcpy(s1, sqlite3_column_blob(stmt, 0), 16);
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ // transfer db[1]'s changes into db[0] (now db[0] has S0 and S1 changes)
+ if (sqlite3_prepare_v2(db[1], "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=0 ORDER BY chunk_index;", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ if (test_chunks_apply_all(stmt, db[0]) != SQLITE_OK) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+ if (!test_chunks_ids_equal(db[0], "a0|a1|b0|b1")) goto finalize;
+
+ // exclude S1 -> only S0 changes (a0,b0) into db[2]
+ if (sqlite3_prepare_v2(db[0], "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=0 AND site_id=? AND exclude_filter_site_id=1 ORDER BY chunk_index;", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ sqlite3_bind_blob(stmt, 1, s1, 16, SQLITE_STATIC);
+ if (test_chunks_apply_all(stmt, db[2]) != SQLITE_OK) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+ if (!test_chunks_ids_equal(db[2], "a0|b0")) goto finalize;
+
+ // inclusive filter S1 -> only S1 changes (a1,b1) into db[3]
+ if (sqlite3_prepare_v2(db[0], "SELECT payload FROM cloudsync_payload_chunks WHERE since_db_version=0 AND site_id=? ORDER BY chunk_index;", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ sqlite3_bind_blob(stmt, 1, s1, 16, SQLITE_STATIC);
+ if (test_chunks_apply_all(stmt, db[3]) != SQLITE_OK) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+ if (!test_chunks_ids_equal(db[3], "a1|b1")) goto finalize;
+
+ // exclude=true without a site_id must error
+ if (sqlite3_prepare_v2(db[0], "SELECT payload FROM cloudsync_payload_chunks WHERE exclude_filter_site_id=1;", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ if (sqlite3_step(stmt) != SQLITE_ERROR) goto finalize; // expected: xFilter raises an error
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ if (sqlite3_prepare_v2(db[0],
+ "SELECT length(cloudsync_payload_blob_checked(0, 0, ?1, 1, 1000000)) > 0 "
+ "AND length(cloudsync_payload_blob_checked(0, 0, ?1, 0, 1000000)) > 0 "
+ "AND cloudsync_payload_blob_checked(999999, 0, ?1, 1, 1000000) IS NULL;", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ sqlite3_bind_blob(stmt, 1, s1, 16, SQLITE_STATIC);
+ if (sqlite3_step(stmt) != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 1) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ if (sqlite3_prepare_v2(db[0], "SELECT cloudsync_payload_blob_checked(0, 0, NULL, 1, 1000000);", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ if (sqlite3_step(stmt) != SQLITE_ERROR) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ // UUID text<->blob roundtrip (dashed and undashed) must recover S1
+ if (sqlite3_prepare_v2(db[0],
+ "SELECT cloudsync_uuid_blob(cloudsync_uuid_text(?1)) = ?1 "
+ "AND cloudsync_uuid_blob(cloudsync_uuid_text(?1, 0)) = ?1 "
+ "AND length(cloudsync_uuid_text(?1)) = 36 "
+ "AND length(cloudsync_uuid_text(?1, 0)) = 32;", -1, &stmt, NULL) != SQLITE_OK) goto finalize;
+ sqlite3_bind_blob(stmt, 1, s1, 16, SQLITE_STATIC);
+ if (sqlite3_step(stmt) != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 1) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ result = true;
+
+finalize:
+ if (!result && print_result) {
+ printf("do_test_payload_chunks_site_exclusion error: %s\n", db[0] ? sqlite3_errmsg(db[0]) : "no db");
+ }
+ if (stmt) sqlite3_finalize(stmt);
+ for (int i = 0; i < 4; ++i) if (db[i]) close_db(db[i]);
+ if (cleanup_databases) {
+ for (int i = 0; i < 4; ++i) {
+ char path[256], walpath[300], shmpath[300];
+ do_build_database_path(path, i, timestamp, saved_counter);
+ snprintf(walpath, sizeof(walpath), "%s-wal", path);
+ snprintf(shmpath, sizeof(shmpath), "%s-shm", path);
+ file_delete_internal(path);
+ file_delete_internal(walpath);
+ file_delete_internal(shmpath);
+ }
+ }
+ return result;
+}
+
+// Read the persisted receive cursor (check_dbversion) of a connection.
+static int64_t test_read_check_dbversion (sqlite3 *db) {
+ sqlite3_stmt *s = NULL;
+ int64_t v = -1;
+ if (sqlite3_prepare_v2(db, "SELECT coalesce((SELECT value FROM cloudsync_settings WHERE key='check_dbversion'),'0');", -1, &s, NULL) == SQLITE_OK) {
+ if (sqlite3_step(s) == SQLITE_ROW) v = sqlite3_column_int64(s, 0);
+ }
+ if (s) sqlite3_finalize(s);
+ return v;
+}
+
+// Compare the split_test(id, body) table across two connections row by row.
+static bool test_split_tables_equal (sqlite3 *src, sqlite3 *dst) {
+ sqlite3_stmt *s1 = NULL, *s2 = NULL;
+ bool result = false;
+ const char *sql = "SELECT id, body FROM split_test ORDER BY id;";
+ if (sqlite3_prepare_v2(src, sql, -1, &s1, NULL) != SQLITE_OK) goto done;
+ if (sqlite3_prepare_v2(dst, sql, -1, &s2, NULL) != SQLITE_OK) goto done;
+ while (1) {
+ int rc1 = sqlite3_step(s1);
+ int rc2 = sqlite3_step(s2);
+ if (rc1 != rc2) goto done;
+ if (rc1 == SQLITE_DONE) break;
+ if (rc1 != SQLITE_ROW) goto done;
+ for (int col = 0; col < 2; ++col) {
+ int n1 = sqlite3_column_bytes(s1, col);
+ int n2 = sqlite3_column_bytes(s2, col);
+ if (n1 != n2) goto done;
+ const void *v1 = sqlite3_column_text(s1, col);
+ const void *v2 = sqlite3_column_text(s2, col);
+ if (n1 > 0 && (!v1 || !v2 || memcmp(v1, v2, n1) != 0)) goto done;
+ }
+ }
+ result = true;
+done:
+ if (s1) sqlite3_finalize(s1);
+ if (s2) sqlite3_finalize(s2);
+ return result;
+}
+
+// Reproduces the chunked-download data-loss bug: a single source db_version split
+// across >= 2 v2 transport chunks. Applying only the first chunk must NOT advance
+// the durable receive cursor into the middle of that db_version, and a re-check
+// (from the unchanged cursor) must re-deliver every row (no loss).
+bool do_test_payload_chunks_split_dbversion (bool print_result, bool cleanup_databases) {
+ sqlite3 *db[2] = {NULL, NULL};
+ sqlite3_stmt *stmt = NULL;
+ cloudsync_context *ctx = NULL;
+ test_payload_chunk *chunks = NULL;
+ int chunk_count = 0, chunk_cap = 0;
+ int64_t split_db_version = -1;
+ int64_t watermark = -1;
+ bool single_db_version = true;
+ bool result = false;
+ int rc = SQLITE_OK;
+
+ time_t timestamp = time(NULL);
+ int saved_counter = test_counter++;
+
+ for (int i = 0; i < 2; ++i) {
+ db[i] = do_create_database_file(i, timestamp, saved_counter);
+ if (!db[i]) goto finalize;
+ rc = sqlite3_exec(db[i],
+ "CREATE TABLE split_test (id TEXT PRIMARY KEY, body TEXT DEFAULT '');"
+ "SELECT cloudsync_init('split_test');",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ }
+
+ // A single CTE INSERT is one transaction -> one source db_version. With a
+ // small chunk budget the ~500 medium rows split across multiple v2 chunks
+ // (every value is small, so no v3 fragments are produced).
+ rc = sqlite3_exec(db[0],
+ "SELECT cloudsync_set('payload_max_chunk_size', '262144');"
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < 500) "
+ "INSERT INTO split_test(id, body) SELECT printf('row-%04d', i), hex(randomblob(700)) FROM c;",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ // Collect the transport chunks for the whole window (since_db_version=0).
+ rc = sqlite3_prepare_v2(db[0],
+ "SELECT payload, db_version_min, db_version_max, watermark_db_version "
+ "FROM cloudsync_payload_chunks WHERE since_db_version=0 ORDER BY chunk_index;",
+ -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
+ int len = sqlite3_column_bytes(stmt, 0);
+ const void *payload = sqlite3_column_blob(stmt, 0);
+ int64_t dbv_min = sqlite3_column_int64(stmt, 1);
+ int64_t dbv_max = sqlite3_column_int64(stmt, 2);
+ int64_t wm = sqlite3_column_int64(stmt, 3);
+ if (!payload || len <= 0) goto finalize;
+ // every chunk must be a plain v2 payload (version byte at offset 4)
+ if (((const unsigned char *)payload)[4] != 2) goto finalize;
+ if (split_db_version < 0) split_db_version = dbv_min;
+ watermark = wm;
+ if (dbv_min != split_db_version || dbv_max != split_db_version) single_db_version = false;
+
+ if (chunk_count == chunk_cap) {
+ int new_cap = chunk_cap ? chunk_cap * 2 : 8;
+ test_payload_chunk *nc = realloc(chunks, sizeof(*chunks) * new_cap);
+ if (!nc) goto finalize;
+ memset(nc + chunk_cap, 0, sizeof(*chunks) * (new_cap - chunk_cap));
+ chunks = nc;
+ chunk_cap = new_cap;
+ }
+ chunks[chunk_count].data = malloc(len);
+ if (!chunks[chunk_count].data) goto finalize;
+ memcpy(chunks[chunk_count].data, payload, len);
+ chunks[chunk_count].len = len;
+ ++chunk_count;
+ }
+ if (rc != SQLITE_DONE) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ // Preconditions for the scenario: a single db_version split into >= 2 chunks,
+ // and the watermark equals that db_version.
+ if (chunk_count < 2 || !single_db_version || watermark != split_db_version) goto finalize;
+
+ // The server's chunk vtab resumes on db_version > since (no seq cursor). So
+ // advancing the cursor to split_db_version mid-stream would make the next
+ // /check (since=split_db_version) return NOTHING -> the un-applied chunks are
+ // lost. Prove that here:
+ rc = sqlite3_prepare_v2(db[0],
+ "SELECT count(*) FROM cloudsync_payload_chunks WHERE since_db_version=?;", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ sqlite3_bind_int64(stmt, 1, split_db_version);
+ if (sqlite3_step(stmt) != SQLITE_ROW || sqlite3_column_int(stmt, 0) != 0) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ // The chunked-download receive cursor is controlled at the C level (the
+ // public cloudsync_payload_apply() SQL function stays single-argument). Drive
+ // cloudsync_payload_apply() directly on an initialized context to exercise the
+ // checkpoint gating exactly as the network /check path does.
+ ctx = cloudsync_context_create(db[1]);
+ if (!ctx || cloudsync_context_init(ctx) == NULL) goto finalize;
+
+ // --- Apply ONLY the first chunk as a NON-FINAL chunk (checkpoint NONE) ---
+ {
+ int n = 0;
+ if (cloudsync_payload_apply(ctx, chunks[0].data, chunks[0].len, &n, CLOUDSYNC_CHECKPOINT_NONE, 0) != DBRES_OK) goto finalize;
+ }
+
+ // Cursor MUST NOT have advanced into the incomplete db_version.
+ if (test_read_check_dbversion(db[1]) != 0) goto finalize;
+ // ...but the first chunk's rows are applied.
+ {
+ sqlite3_stmt *cnt = NULL;
+ int n = -1;
+ if (sqlite3_prepare_v2(db[1], "SELECT count(*) FROM split_test;", -1, &cnt, NULL) == SQLITE_OK &&
+ sqlite3_step(cnt) == SQLITE_ROW) n = sqlite3_column_int(cnt, 0);
+ if (cnt) sqlite3_finalize(cnt);
+ if (n <= 0) goto finalize;
+ }
+
+ // --- Re-check: cursor is still 0, so the server re-delivers the whole window.
+ // Apply every chunk; the first is re-applied (idempotent). The final chunk
+ // carries the watermark checkpoint, advancing the cursor only now. ---
+ for (int i = 0; i < chunk_count; ++i) {
+ bool final_chunk = (i == chunk_count - 1);
+ int n = 0;
+ int64_t checkpoint = final_chunk ? watermark : CLOUDSYNC_CHECKPOINT_NONE;
+ if (cloudsync_payload_apply(ctx, chunks[i].data, chunks[i].len, &n, checkpoint, 0) != DBRES_OK) goto finalize;
+ }
+
+ // No data lost: destination matches source exactly.
+ if (!test_split_tables_equal(db[0], db[1])) goto finalize;
+ // Cursor advanced to the watermark only after the full stream was applied.
+ if (test_read_check_dbversion(db[1]) != watermark) goto finalize;
+
+ result = true;
+
+finalize:
+ if (!result && print_result) {
+ printf("do_test_payload_chunks_split_dbversion error: %s / %s (chunks=%d, split_dbv=%lld, watermark=%lld, single=%d)\n",
+ db[0] ? sqlite3_errmsg(db[0]) : "no src", db[1] ? sqlite3_errmsg(db[1]) : "no dst",
+ chunk_count, (long long)split_db_version, (long long)watermark, (int)single_db_version);
+ }
+ if (ctx) cloudsync_context_free(ctx);
+ if (stmt) sqlite3_finalize(stmt);
+ test_payload_chunks_free(chunks, chunk_count);
+ for (int i = 0; i < 2; ++i) if (db[i]) close_db(db[i]);
+ if (cleanup_databases) {
+ for (int i = 0; i < 2; ++i) {
+ char path[256], walpath[300], shmpath[300];
+ do_build_database_path(path, i, timestamp, saved_counter);
+ snprintf(walpath, sizeof(walpath), "%s-wal", path);
+ snprintf(shmpath, sizeof(shmpath), "%s-shm", path);
+ file_delete_internal(path);
+ file_delete_internal(walpath);
+ file_delete_internal(shmpath);
+ }
+ }
+ return result;
+}
+
+// Proves the positional-cursor resume of cloudsync_payload_chunks: paging the
+// window one chunk per call with an O(1) (db_version, seq, frag_offset) seek
+// yields byte-identical chunks to a single full-window scan. The dataset mixes a
+// db_version split across chunks (row-boundary resumes, incl. resumes landing
+// INSIDE a single committed version that the old since>db_version cursor could not
+// express) with a value larger than the chunk budget (mid-fragment resumes).
+// Part 2 is end-to-end: the positionally-drained stream is applied to a fresh
+// receiver and its table content is compared to the source (drain -> apply ->
+// faithful replica), the real path the /check job will use.
+bool do_test_payload_chunks_positional_resume (bool print_result, bool cleanup_databases) {
+ sqlite3 *db = NULL;
+ sqlite3 *db2 = NULL;
+ sqlite3_stmt *stmt = NULL;
+ sqlite3_stmt *apply = NULL;
+ test_payload_chunk *base = NULL; int base_count = 0, base_cap = 0;
+ test_payload_chunk *pos = NULL; int pos_count = 0, pos_cap = 0;
+ int64_t watermark = -1;
+ bool result = false;
+ int rc = SQLITE_OK;
+
+ time_t timestamp = time(NULL);
+ int saved_counter = test_counter++;
+
+ db = do_create_database_file(0, timestamp, saved_counter);
+ if (!db) goto finalize;
+ rc = sqlite3_exec(db,
+ "CREATE TABLE split_test (id TEXT PRIMARY KEY, body TEXT DEFAULT '');"
+ "SELECT cloudsync_init('split_test');"
+ "SELECT cloudsync_set('payload_max_chunk_size', '262144');",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ // tx1: ~500 medium rows in one transaction -> one db_version split across
+ // several v2 chunks (row-boundary resumes within a single version).
+ rc = sqlite3_exec(db,
+ "WITH RECURSIVE c(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM c WHERE i < 500) "
+ "INSERT INTO split_test(id, body) SELECT printf('row-%04d', i), hex(randomblob(700)) FROM c;",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ // tx2: one value far larger than the chunk budget -> v3 fragments across
+ // several chunks (mid-fragment resumes inside a single value).
+ rc = sqlite3_exec(db,
+ "INSERT INTO split_test(id, body) VALUES ('big', hex(randomblob(900000)));",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ // Baseline: every chunk of the whole window, in order, via the legacy scan.
+ rc = sqlite3_prepare_v2(db,
+ "SELECT payload, watermark_db_version FROM cloudsync_payload_chunks "
+ "WHERE since_db_version=0 ORDER BY chunk_index;", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) {
+ int len = sqlite3_column_bytes(stmt, 0);
+ const void *payload = sqlite3_column_blob(stmt, 0);
+ if (!payload || len <= 0) goto finalize;
+ watermark = sqlite3_column_int64(stmt, 1);
+ if (base_count == base_cap) {
+ int nc = base_cap ? base_cap * 2 : 8;
+ test_payload_chunk *t = realloc(base, sizeof(*t) * nc);
+ if (!t) goto finalize;
+ memset(t + base_cap, 0, sizeof(*t) * (nc - base_cap));
+ base = t; base_cap = nc;
+ }
+ base[base_count].data = malloc(len);
+ if (!base[base_count].data) goto finalize;
+ memcpy(base[base_count].data, payload, len);
+ base[base_count].len = len;
+ ++base_count;
+ }
+ if (rc != SQLITE_DONE) goto finalize;
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ // Scenario must actually exercise multiple chunks (and thus resumes).
+ if (base_count < 4 || watermark <= 0) goto finalize;
+
+ // Positional drain: one chunk per call, seeking to the cursor the previous
+ // chunk reported. until is the frozen watermark from the baseline.
+ rc = sqlite3_prepare_v2(db,
+ "SELECT payload, next_db_version, next_seq, next_frag_offset, is_final "
+ "FROM cloudsync_payload_chunks "
+ "WHERE until_db_version=?1 AND resume_db_version=?2 AND resume_seq=?3 AND resume_frag_offset=?4 "
+ "LIMIT 1;", -1, &stmt, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ int64_t rdbv = 0, rseq = 0, rfrag = 0;
+ bool done = false;
+ bool saw_frag_resume = false; // a follow-up call actually resumed mid-value
+ // Hard cap guards against a resume bug looping forever.
+ for (int guard = 0; !done && guard <= base_count + 2; ++guard) {
+ if (rfrag > 0) saw_frag_resume = true;
+ sqlite3_reset(stmt);
+ sqlite3_bind_int64(stmt, 1, watermark);
+ sqlite3_bind_int64(stmt, 2, rdbv);
+ sqlite3_bind_int64(stmt, 3, rseq);
+ sqlite3_bind_int64(stmt, 4, rfrag);
+ rc = sqlite3_step(stmt);
+ if (rc != SQLITE_ROW) goto finalize; // every step before is_final must yield a chunk
+ int len = sqlite3_column_bytes(stmt, 0);
+ const void *payload = sqlite3_column_blob(stmt, 0);
+ if (!payload || len <= 0) goto finalize;
+ rdbv = sqlite3_column_int64(stmt, 1);
+ rseq = sqlite3_column_int64(stmt, 2);
+ rfrag = sqlite3_column_int64(stmt, 3);
+ done = sqlite3_column_int(stmt, 4) != 0;
+ if (pos_count == pos_cap) {
+ int nc = pos_cap ? pos_cap * 2 : 8;
+ test_payload_chunk *t = realloc(pos, sizeof(*t) * nc);
+ if (!t) goto finalize;
+ memset(t + pos_cap, 0, sizeof(*t) * (nc - pos_cap));
+ pos = t; pos_cap = nc;
+ }
+ pos[pos_count].data = malloc(len);
+ if (!pos[pos_count].data) goto finalize;
+ memcpy(pos[pos_count].data, payload, len);
+ pos[pos_count].len = len;
+ ++pos_count;
+ }
+ sqlite3_finalize(stmt); stmt = NULL;
+
+ // The positional drain must terminate exactly on is_final, reproduce the
+ // baseline chunk sequence byte-for-byte, and have actually exercised a
+ // mid-value (fragment) resume — not only row-boundary resumes.
+ if (!done || pos_count != base_count || !saw_frag_resume) goto finalize;
+ for (int i = 0; i < base_count; ++i) {
+ if (pos[i].len != base[i].len) goto finalize;
+ if (memcmp(pos[i].data, base[i].data, base[i].len) != 0) goto finalize;
+ }
+
+ // End-to-end: apply the positionally-drained stream to a fresh receiver and
+ // assert its table content matches the source. This exercises the real /check
+ // path (positional drain -> apply -> faithful replica), not just byte-identity.
+ db2 = do_create_database_file(1, timestamp, saved_counter);
+ if (!db2) goto finalize;
+ rc = sqlite3_exec(db2,
+ "CREATE TABLE split_test (id TEXT PRIMARY KEY, body TEXT DEFAULT '');"
+ "SELECT cloudsync_init('split_test');",
+ NULL, NULL, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+
+ rc = sqlite3_prepare_v2(db2, "SELECT cloudsync_payload_apply(?);", -1, &apply, NULL);
+ if (rc != SQLITE_OK) goto finalize;
+ // Apply in reverse drain order: apply must reassemble v3 fragments and merge
+ // rows independent of transport order.
+ for (int i = pos_count - 1; i >= 0; --i) {
+ rc = sqlite3_bind_blob(apply, 1, pos[i].data, pos[i].len, SQLITE_STATIC);
+ if (rc != SQLITE_OK) goto finalize;
+ rc = sqlite3_step(apply);
+ if (rc != SQLITE_ROW) goto finalize;
+ sqlite3_reset(apply);
+ sqlite3_clear_bindings(apply);
+ }
+ sqlite3_finalize(apply); apply = NULL;
+
+ if (!test_split_tables_equal(db, db2)) goto finalize;
+
+ result = true;
+
+finalize:
+ if (!result && print_result) {
+ printf("do_test_payload_chunks_positional_resume error: %s (base=%d, pos=%d, watermark=%lld)\n",
+ db ? sqlite3_errmsg(db) : "no db", base_count, pos_count, (long long)watermark);
+ }
+ if (stmt) sqlite3_finalize(stmt);
+ if (apply) sqlite3_finalize(apply);
+ test_payload_chunks_free(base, base_count);
+ test_payload_chunks_free(pos, pos_count);
+ if (db) close_db(db);
+ if (db2) close_db(db2);
+ if (cleanup_databases) {
+ for (int i = 0; i < 2; ++i) {
+ char path[256], walpath[300], shmpath[300];
+ do_build_database_path(path, i, timestamp, saved_counter);
+ snprintf(walpath, sizeof(walpath), "%s-wal", path);
+ snprintf(shmpath, sizeof(shmpath), "%s-shm", path);
+ file_delete_internal(path);
+ file_delete_internal(walpath);
+ file_delete_internal(shmpath);
+ }
+ }
+ return result;
+}
+
bool do_test_payload_idempotency (int nclients, bool print_result, bool cleanup_databases) {
sqlite3 *db[2] = {NULL, NULL};
bool result = false;
@@ -12388,6 +13381,11 @@ int main (int argc, const char * argv[]) {
result += test_report("Payload Buffer Test (600KB):", do_test_payload_buffer(600 * 1024));
result += test_report("Payload Buffer Test (1MB):", do_test_payload_buffer(1024 * 1024));
result += test_report("Payload Buffer Test (10MB):", do_test_payload_buffer(10 * 1024 * 1024));
+ result += test_report("Payload Chunks Large Values:", do_test_payload_chunks_large_values(print_result, cleanup_databases));
+ result += test_report("Payload Chunks CRDT Convergence:", do_test_payload_chunks_crdt_convergence(print_result, cleanup_databases));
+ result += test_report("Payload Chunks Site Exclusion:", do_test_payload_chunks_site_exclusion(print_result, cleanup_databases));
+ result += test_report("Payload Chunks Split db_version:", do_test_payload_chunks_split_dbversion(print_result, cleanup_databases));
+ result += test_report("Payload Chunks Positional Resume:", do_test_payload_chunks_positional_resume(print_result, cleanup_databases));
// close local database
close_db(db);