diff --git a/rationai/mlkit/data/datasets/slides_tiles_loader.py b/rationai/mlkit/data/datasets/slides_tiles_loader.py index 7359965..d487ebe 100644 --- a/rationai/mlkit/data/datasets/slides_tiles_loader.py +++ b/rationai/mlkit/data/datasets/slides_tiles_loader.py @@ -76,9 +76,9 @@ def _build_tile_index(tiles: HFDataset) -> dict[str | bytes, pa.ListScalar]: if len(tiles) == 0: return {} - # 1. Grab the column directly from the underlying PyArrow Table - slide_ids = tiles.data.column("slide_id") - num_rows = len(slide_ids) + # 1. Read slide_id as Arrow data, respecting any prior .filter() / .select(). + slide_ids = tiles.with_format("arrow")["slide_id"] + num_rows = len(tiles) # 2. Handle the "Large" type conversion current_type = slide_ids.type diff --git a/uv.lock b/uv.lock index 6c5d097..cb69bbf 100644 --- a/uv.lock +++ b/uv.lock @@ -2287,7 +2287,7 @@ dependencies = [ [[package]] name = "rationai-mlkit" -version = "0.4.0" +version = "0.4.1" source = { virtual = "." } dependencies = [ { name = "datasets" },