Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/TiledArray/array_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1044,17 +1044,22 @@ std::shared_ptr<ArrayImpl<Tile, Policy>> make_with_new_trange(
// build each target tile in one pass: a single source lookup per cell
// sizes it and fills it together (no separate all-ranges walk).
const auto outer_range = target_trange.make_tile_range(target_ord);
ArenaToTBuilder<Tile> builder(outer_range);
const std::size_t n = outer_range.volume();
using InnerRange = typename Tile::value_type::range_type;
auto range_fn = [&](std::size_t o) -> InnerRange {
const auto* sc = source_cell_at(outer_range.idx(o));
return (!sc || sc->empty()) ? InnerRange{} : sc->range();
};
Tile tile = arena_outer_init<Tile>(outer_range, 1, range_fn);
for (std::size_t o = 0; o < n; ++o) {
auto& cell = tile.data()[o];
if (cell.empty()) continue; // deliberately-null cell
const auto* sc = source_cell_at(outer_range.idx(o));
if (!sc || sc->empty()) continue; // leaves a deliberately-null cell
auto& cell = builder.emplace(o, sc->range());
const auto* s = sc->data();
auto* d = cell.data();
for (std::size_t p = 0; p < cell.size(); ++p) d[p] = s[p];
}
target_array.set(target_ord, std::move(builder).finish());
target_array.set(target_ord, std::move(tile));
}
target_array.world().gop.fence();
} else {
Expand Down
82 changes: 75 additions & 7 deletions src/TiledArray/tensor/arena_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
#include "TiledArray/tensor/arena_tensor.h"

#include <algorithm>
#include <atomic>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <memory_resource>
#include <new>
Expand Down Expand Up @@ -51,6 +54,62 @@ std::shared_ptr<typename OuterTensor::value_type[]> make_outer_data(

} // namespace

/// Single-page invariant check for arena ToT outer tiles, env-gated by
/// `TA_ASSERT_SINGLE_PAGE` (zero overhead when unset). A size-determinable ToT
/// outer tile must occupy at most one arena page; `Arena::page_count() > 1`
/// means an incremental build spilled across pages and the strided-BLAS fast
/// path would silently revert to per-cell AXPY. On violation we print the
/// offending site and throw a TiledArray::Exception so the run fails loudly.
/// When the gate is on, a summary (tiles checked, violations) is printed at
/// process exit (on a clean exit; the throw path skips it). Using
/// `page_count()` -- not `classify_run` -- so the check is valid for tiles with
/// null or non-uniform inner cells (which are still single-page).
inline bool arena_single_page_assert_enabled() {
static const bool on = [] {
const char* e = std::getenv("TA_ASSERT_SINGLE_PAGE");
return e != nullptr && e[0] != '\0' && e[0] != '0';
}();
return on;
}

inline std::atomic<std::size_t>& arena_single_page_check_count() {
static std::atomic<std::size_t> c{0};
return c;
}

inline std::atomic<std::size_t>& arena_single_page_violation_count() {
static std::atomic<std::size_t> c{0};
return c;
}

inline void arena_assert_single_page(const Arena& arena, const char* where) {
if (!arena_single_page_assert_enabled()) return;
static const bool registered = [] {
std::atexit([] {
std::fprintf(
stderr,
"[TA_ASSERT_SINGLE_PAGE] checked %zu arena ToT outer tile(s), "
"%zu multi-page violation(s)\n",
arena_single_page_check_count().load(std::memory_order_relaxed),
arena_single_page_violation_count().load(std::memory_order_relaxed));
});
return true;
}();
(void)registered;
arena_single_page_check_count().fetch_add(1, std::memory_order_relaxed);
const std::size_t pages = arena.page_count();
if (pages > 1) {
arena_single_page_violation_count().fetch_add(1, std::memory_order_relaxed);
std::fprintf(stderr,
"[TA_ASSERT_SINGLE_PAGE] VIOLATION at %s: arena ToT outer tile "
"spans %zu pages (expected <= 1)\n",
where, pages);
TA_EXCEPTION(
"TA_ASSERT_SINGLE_PAGE: arena ToT outer tile spans multiple arena "
"pages -- a size-determinable ToT must be single-page");
}
}

/// Allocate an arena-backed ToT outer tile with caller-provided inner ranges.
///
/// `inner_range_fn(cell_ordinal)` -> inner `range_type` for each cell ordinal
Expand Down Expand Up @@ -138,6 +197,7 @@ OuterTensor arena_outer_init(
InnerT(r, std::shared_ptr<T[]>(h, reinterpret_cast<T*>(h.get())));
}
}
arena_assert_single_page(*arena_ptr, "arena_outer_init");
return result;
}

Expand Down Expand Up @@ -229,6 +289,7 @@ class ArenaToTBuilder {

/// Finalize and hand back the assembled outer tile; the builder is spent.
OuterTensor finish() && {
arena_assert_single_page(*arena_, "ArenaToTBuilder::finish");
return OuterTensor(outer_range_, batch_sz_, std::move(data_));
}

Expand All @@ -250,21 +311,28 @@ class ArenaToTBuilder {
/// to the next -- no separate all-ranges walk. A zero-volume inner range
/// yields a deliberately-null cell, which `inner_fill_fn` is not invoked on.
/// Cells are zero-initialized, so the default no-op fill still leaves zeroed
/// storage. Backed by `ArenaToTBuilder`.
/// storage. Backed by the up-front single-page `arena_outer_init`.
template <typename OuterTensor, typename InnerRangeFn,
typename InnerFillFn = nested_fill_noop>
OuterTensor make_nested_tile(
const typename OuterTensor::range_type& outer_range,
InnerRangeFn&& inner_range_fn, InnerFillFn&& inner_fill_fn = {}) {
ArenaToTBuilder<OuterTensor> builder(outer_range, /*batch_sz=*/1,
/*zero_init=*/true);
// Up-front, single contiguous page: pre-walk ranges (cheap headers only),
// reserve one exact slab, fill in place. inner_range_fn/inner_fill_fn are
// random-access (idx-driven), so there is no single-pass data to buffer and
// no peak-memory doubling -- only the genuinely single-pass init_tiles path
// needs the incremental ArenaToTBuilder.
auto cell_range_fn = [&](std::size_t ord) {
return inner_range_fn(outer_range.idx(ord));
};
OuterTensor result =
arena_outer_init<OuterTensor>(outer_range, 1, cell_range_fn);
const std::size_t N = outer_range.volume();
for (std::size_t ord = 0; ord < N; ++ord) {
const auto idx = outer_range.idx(ord);
auto& cell = builder.emplace(ord, inner_range_fn(idx));
if (!cell.empty()) inner_fill_fn(cell, idx);
auto& cell = result.data()[ord];
if (!cell.empty()) inner_fill_fn(cell, outer_range.idx(ord));
}
return std::move(builder).finish();
return result;
}

/// Apply a unary fill op while preserving each source inner range.
Expand Down
55 changes: 55 additions & 0 deletions tests/tot_construction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ auto inner_range_2d(std::size_t d0, std::size_t d1) {
return typename InnerTile::range_type(std::vector<std::size_t>{d0, d1});
}

// Assert an arena ToT outer tile is a single contiguous page: uniform-size
// cells at constant page-jump-free stride. classify_run returns 0 when clean,
// 3 on a page jump (multi-page build).
template <typename OuterTile>
static void check_single_page_uniform(const OuterTile& tile) {
const std::size_t n = tile.range().volume();
const int code = TA::detail::classify_run(
[&](std::size_t i) -> decltype(auto) { return tile.data()[i]; }, n);
BOOST_CHECK_EQUAL(code, 0); // 0 == clean single-page constant stride
}

template <typename InnerTile>
void verify_cell(const InnerTile& cell, long e, bool expect_filled) {
BOOST_REQUIRE(!cell.empty());
Expand Down Expand Up @@ -778,6 +789,25 @@ BOOST_AUTO_TEST_CASE(arena_tile_bipartite_permute) {
test_arena_tile_permute();
}

BOOST_AUTO_TEST_CASE(make_nested_tile_arena_single_page) {
using Inner = TA::ArenaTensor<double>;
using OuterTile = TA::Tensor<Inner>;
// 256 cells x 64 doubles = 128 KiB > 64 KiB default page, so a multi-page
// builder spills; a single-page allocator does not.
const std::size_t ncells = 256, isize = 64;
TA::Range outer(std::array<std::size_t, 1>{ncells});
auto range_fn = [isize](const TA::Range::index_type&) {
return Inner::range_type(std::vector<std::size_t>{isize});
};
auto fill = [](Inner& cell, const TA::Range::index_type&) {
for (std::size_t p = 0; p < cell.size(); ++p) cell[p] = double(p);
};
OuterTile tile =
TA::detail::make_nested_tile<OuterTile>(outer, range_fn, fill);
BOOST_REQUIRE_EQUAL(ncells * isize * sizeof(double), std::size_t(128 * 1024));
check_single_page_uniform(tile);
}

BOOST_AUTO_TEST_SUITE_END()

BOOST_AUTO_TEST_SUITE(tot_construction_dist_suite, TA_UT_LABEL_DISTRIBUTED)
Expand All @@ -786,4 +816,29 @@ BOOST_AUTO_TEST_CASE(arena_tot_remote_tile_transport) {
test_distributed_arena_tot();
}

BOOST_AUTO_TEST_CASE(retile_arena_tot_single_page) {
using Inner = TA::ArenaTensor<double>;
using OuterTile = TA::Tensor<Inner>;
using Array = TA::DistArray<OuterTile, TA::DensePolicy>;
auto& world = *GlobalFixture::world;
const std::size_t isize = 64; // 256 cells x 64 doubles = 128 KiB > page
TA::TiledRange src_tr{{0, 256}}; // 256 cells in one outer tile
Array src(world, src_tr);
src.init_tiles_nested(
[isize](const auto&) {
return Inner::range_type(std::vector<std::size_t>{isize});
},
[](Inner& c, const auto&) {
for (std::size_t p = 0; p < c.size(); ++p) c[p] = double(p);
});
world.gop.fence();
TA::TiledRange dst_tr{{0, 128, 256}}; // ncells/2, ncells -> 2 tiles
Array dst = TA::retile(src, dst_tr);
world.gop.fence();
for (const auto ord : *dst.pmap()) {
if (dst.is_zero(ord)) continue;
check_single_page_uniform(dst.find(ord).get());
}
}

BOOST_AUTO_TEST_SUITE_END()
Loading