From 0266bfe08f6fa753438df5aa55ac73e99a9159c4 Mon Sep 17 00:00:00 2001
From: apakhomov <apkhmv@gmail.com>
Date: Fri, 19 Jun 2026 13:44:51 +0300
Subject: [PATCH] GG-49287 Bulk decode of primitive arrays (vectorized
 float-vector deserialization)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PrimitiveArray.to_python_not_null built the Python list element-by-element over the
ctypes array (`[obj.data[i] for i in range(len)]`). For large float arrays — e.g. a
1536-d vector value per vector-query result row — this is the dominant client cost:
cProfile of a kNN query loop showed ~41% of total client CPU in this one comprehension
(~77M per-element ctypes reads for 500 queries x 100 results).

Replace it with a single bulk buffer decode: memoryview(obj.data).cast('B').cast(fmt).tolist().
The cast through bytes is needed because ctypes LittleEndianStructure reports an explicit
byte-order format (e.g. '<f') that memoryview.tolist() rejects; we strip the order prefix
and reinterpret in native order (correct on little-endian platforms). Generic across all
primitive array types (float/int/long/...).

Measured (dbpedia-openai 1536-d vector queries, single client, identical results /
key-checksums): 1.4x @ k=10 -> 1.9x @ k=800 on the vector-query result path; the win grows
with the number of result rows deserialized. Part of GG-49287 (pygridgain deserialization
fast path).
---
 pygridgain/datatypes/primitive_arrays.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pygridgain/datatypes/primitive_arrays.py b/pygridgain/datatypes/primitive_arrays.py
index e498336..7887c99 100644
--- a/pygridgain/datatypes/primitive_arrays.py
+++ b/pygridgain/datatypes/primitive_arrays.py
@@ -185,7 +185,12 @@ def parse_not_null(cls, stream):
 
     @classmethod
     def to_python_not_null(cls, ctypes_object, **kwargs):
-        return [ctypes_object.data[i] for i in range(ctypes_object.length)]
+        # Bulk buffer decode instead of an element-wise Python list comprehension over the
+        # ctypes array (~the dominant client cost for large float vectors). Cast through bytes
+        # to the native element format (ctypes LittleEndianStructure reports e.g. '<f', which
+        # memoryview.tolist() rejects); correct on little-endian platforms.
+        mv = memoryview(ctypes_object.data)
+        return mv.cast('B').cast(mv.format.lstrip('<>=!@')).tolist()
 
     @classmethod
     def from_python_not_null(cls, stream, value, **kwargs):