From 0266bfe08f6fa753438df5aa55ac73e99a9159c4 Mon Sep 17 00:00:00 2001 From: apakhomov Date: Fri, 19 Jun 2026 13:44:51 +0300 Subject: [PATCH] GG-49287 Bulk decode of primitive arrays (vectorized float-vector deserialization) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PrimitiveArray.to_python_not_null built the Python list element-by-element over the ctypes array (`[obj.data[i] for i in range(len)]`). For large float arrays — e.g. a 1536-d vector value per vector-query result row — this is the dominant client cost: cProfile of a kNN query loop showed ~41% of total client CPU in this one comprehension (~77M per-element ctypes reads for 500 queries x 100 results). Replace it with a single bulk buffer decode: memoryview(obj.data).cast('B').cast(fmt).tolist(). The cast through bytes is needed because ctypes LittleEndianStructure reports an explicit byte-order format (e.g. ' 1.9x @ k=800 on the vector-query result path; the win grows with the number of result rows deserialized. Part of GG-49287 (pygridgain deserialization fast path). --- pygridgain/datatypes/primitive_arrays.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pygridgain/datatypes/primitive_arrays.py b/pygridgain/datatypes/primitive_arrays.py index e498336..7887c99 100644 --- a/pygridgain/datatypes/primitive_arrays.py +++ b/pygridgain/datatypes/primitive_arrays.py @@ -185,7 +185,12 @@ def parse_not_null(cls, stream): @classmethod def to_python_not_null(cls, ctypes_object, **kwargs): - return [ctypes_object.data[i] for i in range(ctypes_object.length)] + # Bulk buffer decode instead of an element-wise Python list comprehension over the + # ctypes array (~the dominant client cost for large float vectors). Cast through bytes + # to the native element format (ctypes LittleEndianStructure reports e.g. '=!@')).tolist() @classmethod def from_python_not_null(cls, stream, value, **kwargs):