From 8ec7c3f15da739c1a8d78c1eec1e1f45cbe8ba21 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 19 Dec 2022 14:41:43 +0900 Subject: [PATCH v14 8/9] PoC: calculate memory usage in radix tree. --- src/backend/lib/radixtree.c | 137 +++++++++++++++++++++++------------ src/backend/utils/mmgr/dsa.c | 42 +++++++++++ src/include/utils/dsa.h | 1 + 3 files changed, 135 insertions(+), 45 deletions(-) diff --git a/src/backend/lib/radixtree.c b/src/backend/lib/radixtree.c index 455071cbab..4ad55a0b7c 100644 --- a/src/backend/lib/radixtree.c +++ b/src/backend/lib/radixtree.c @@ -360,14 +360,24 @@ typedef struct rt_size_class_elem const char *name; int fanout; - /* slab chunk size */ + /* node size */ Size inner_size; Size leaf_size; /* slab block size */ - Size inner_blocksize; - Size leaf_blocksize; + Size slab_inner_blocksize; + Size slab_leaf_blocksize; + + /* + * We can get how much memory is allocated for a radix tree node using + * GetMemoryChunkSpace() for the local radix tree case. However, in the + * shared case, since DSA doesn't have such functionality we prepare the + * node size that are allocated in DSA for memory calculation. + */ + Size dsa_inner_size; + Size dsa_leaf_size; } rt_size_class_elem; +static bool rt_size_class_dsa_info_initialized = false; /* * Calculate the slab blocksize so that we can allocate at least 32 chunks @@ -381,40 +391,40 @@ static rt_size_class_elem rt_size_class_info[RT_SIZE_CLASS_COUNT] = { .fanout = 4, .inner_size = sizeof(rt_node_inner_4) + 4 * sizeof(rt_node *), .leaf_size = sizeof(rt_node_leaf_4) + 4 * sizeof(uint64), - .inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_4) + 4 * sizeof(rt_node *)), - .leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_4) + 4 * sizeof(uint64)), + .slab_inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_4) + 4 * sizeof(rt_node *)), + .slab_leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_4) + 4 * sizeof(uint64)), }, [RT_CLASS_32_PARTIAL] = { .name = "radix tree node 15", .fanout = 15, .inner_size = sizeof(rt_node_inner_32) + 15 * sizeof(rt_node *), .leaf_size = sizeof(rt_node_leaf_32) + 15 * sizeof(uint64), - .inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_32) + 15 * sizeof(rt_node *)), - .leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_32) + 15 * sizeof(uint64)), + .slab_inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_32) + 15 * sizeof(rt_node *)), + .slab_leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_32) + 15 * sizeof(uint64)), }, [RT_CLASS_32_FULL] = { .name = "radix tree node 32", .fanout = 32, .inner_size = sizeof(rt_node_inner_32) + 32 * sizeof(rt_node *), .leaf_size = sizeof(rt_node_leaf_32) + 32 * sizeof(uint64), - .inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_32) + 32 * sizeof(rt_node *)), - .leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_32) + 32 * sizeof(uint64)), + .slab_inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_32) + 32 * sizeof(rt_node *)), + .slab_leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_32) + 32 * sizeof(uint64)), }, [RT_CLASS_125_FULL] = { .name = "radix tree node 125", .fanout = 125, .inner_size = sizeof(rt_node_inner_125) + 125 * sizeof(rt_node *), .leaf_size = sizeof(rt_node_leaf_125) + 125 * sizeof(uint64), - .inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_125) + 125 * sizeof(rt_node *)), - .leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_125) + 125 * sizeof(uint64)), + .slab_inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_125) + 125 * sizeof(rt_node *)), + .slab_leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_125) + 125 * sizeof(uint64)), }, [RT_CLASS_256] = { .name = "radix tree node 256", .fanout = 256, .inner_size = sizeof(rt_node_inner_256), .leaf_size = sizeof(rt_node_leaf_256), - .inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_256)), - .leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_256)), + .slab_inner_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_inner_256)), + .slab_leaf_blocksize = NODE_SLAB_BLOCK_SIZE(sizeof(rt_node_leaf_256)), }, }; @@ -477,6 +487,12 @@ typedef struct radix_tree_control uint64 max_val; uint64 num_keys; + /* + * Track the amount of memory used. The callers can ask for it + * with rt_memory_usage(). + */ + uint64 mem_used; + /* statistics */ #ifdef RT_DEBUG int32 cnt[RT_SIZE_CLASS_COUNT]; @@ -1005,15 +1021,22 @@ static rt_node_ptr rt_alloc_node(radix_tree *tree, rt_size_class size_class, bool inner) { rt_node_ptr newnode; + Size size; if (RadixTreeIsShared(tree)) { dsa_pointer dp; if (inner) + { dp = dsa_allocate(tree->area, rt_size_class_info[size_class].inner_size); + size = rt_size_class_info[size_class].dsa_inner_size; + } else + { dp = dsa_allocate(tree->area, rt_size_class_info[size_class].leaf_size); + size = rt_size_class_info[size_class].dsa_leaf_size; + } newnode.encoded = (rt_pointer) dp; newnode.decoded = rt_pointer_decode(tree, newnode.encoded); @@ -1028,8 +1051,12 @@ rt_alloc_node(radix_tree *tree, rt_size_class size_class, bool inner) rt_size_class_info[size_class].leaf_size); newnode.encoded = rt_pointer_encode(newnode.decoded); + size = GetMemoryChunkSpace(newnode.decoded); } + /* update memory usage */ + tree->ctl->mem_used += size; + #ifdef RT_DEBUG /* update the statistics */ tree->ctl->cnt[size_class]++; @@ -1095,6 +1122,15 @@ rt_grow_node_kind(radix_tree *tree, rt_node_ptr node, uint8 new_kind) static void rt_free_node(radix_tree *tree, rt_node_ptr node) { + int size; + static const int fanout_node_class[RT_NODE_MAX_SLOTS] = + { + [4] = RT_CLASS_4_FULL, + [15] = RT_CLASS_32_PARTIAL, + [32] = RT_CLASS_32_FULL, + [125] = RT_CLASS_125_FULL, + }; + /* If we're deleting the root node, make the tree empty */ if (tree->ctl->root == node.encoded) { @@ -1104,28 +1140,38 @@ rt_free_node(radix_tree *tree, rt_node_ptr node) #ifdef RT_DEBUG { - int i; + int size_class = (NODE_FANOUT(node) == 0) + ? RT_CLASS_256 + : fanout_node_class[NODE_FANOUT(node)]; /* update the statistics */ - for (i = 0; i < RT_SIZE_CLASS_COUNT; i++) - { - if (NODE_FANOUT(node) == rt_size_class_info[i].fanout) - break; - } - - /* fanout of node256 is intentionally 0 */ - if (i == RT_SIZE_CLASS_COUNT) - i = RT_CLASS_256; - - tree->ctl->cnt[i]--; - Assert(tree->ctl->cnt[i] >= 0); + tree->ctl->cnt[size_class]--; + Assert(tree->ctl->cnt[size_class] >= 0); } #endif if (RadixTreeIsShared(tree)) + { + int size_class = (NODE_FANOUT(node) == 0) + ? RT_CLASS_256 + : fanout_node_class[NODE_FANOUT(node)]; + + if (!NODE_IS_LEAF(node)) + size = rt_size_class_info[size_class].dsa_inner_size; + else + size = rt_size_class_info[size_class].dsa_leaf_size; + dsa_free(tree->area, (dsa_pointer) node.encoded); + } else + { + size = GetMemoryChunkSpace(node.decoded); pfree(node.decoded); + } + + /* update memory usage */ + tree->ctl->mem_used -= size; + Assert(tree->ctl->mem_used > 0); } /* @@ -1837,15 +1883,18 @@ rt_create(MemoryContext ctx, dsa_area *area) dp = dsa_allocate0(area, sizeof(radix_tree_control)); tree->ctl = (radix_tree_control *) dsa_get_address(area, dp); tree->ctl->handle = (rt_handle) dp; + tree->ctl->mem_used += dsa_get_size_class(sizeof(radix_tree_control)); } else { tree->ctl = (radix_tree_control *) palloc0(sizeof(radix_tree_control)); tree->ctl->handle = InvalidDsaPointer; + tree->ctl->mem_used += GetMemoryChunkSpace(tree->ctl); } tree->ctl->magic = RADIXTREE_MAGIC; tree->ctl->root = InvalidRTPointer; + tree->ctl->mem_used = GetMemoryChunkSpace(tree); /* Create the slab allocator for each size class */ if (area == NULL) @@ -1854,17 +1903,29 @@ rt_create(MemoryContext ctx, dsa_area *area) { tree->inner_slabs[i] = SlabContextCreate(ctx, rt_size_class_info[i].name, - rt_size_class_info[i].inner_blocksize, + rt_size_class_info[i].slab_inner_blocksize, rt_size_class_info[i].inner_size); tree->leaf_slabs[i] = SlabContextCreate(ctx, rt_size_class_info[i].name, - rt_size_class_info[i].leaf_blocksize, + rt_size_class_info[i].slab_leaf_blocksize, rt_size_class_info[i].leaf_size); #ifdef RT_DEBUG tree->ctl->cnt[i] = 0; #endif } } + else if (!rt_size_class_dsa_info_initialized) + { + for (int i = 0; i < RT_SIZE_CLASS_COUNT; i++) + { + rt_size_class_info[i].dsa_inner_size = + dsa_get_size_class(rt_size_class_info[i].inner_size); + rt_size_class_info[i].dsa_leaf_size = + dsa_get_size_class(rt_size_class_info[i].leaf_size); + } + + rt_size_class_dsa_info_initialized = true; + } MemoryContextSwitchTo(old_ctx); @@ -2534,22 +2595,8 @@ rt_num_entries(radix_tree *tree) uint64 rt_memory_usage(radix_tree *tree) { - Size total = sizeof(radix_tree) + sizeof(radix_tree_control); - Assert(!RadixTreeIsShared(tree) || tree->ctl->magic == RADIXTREE_MAGIC); - - if (RadixTreeIsShared(tree)) - total = dsa_get_total_size(tree->area); - else - { - for (int i = 0; i < RT_NODE_KIND_COUNT; i++) - { - total += MemoryContextMemAllocated(tree->inner_slabs[i], true); - total += MemoryContextMemAllocated(tree->leaf_slabs[i], true); - } - } - - return total; + return tree->ctl->mem_used; } /* @@ -2873,9 +2920,9 @@ rt_dump(radix_tree *tree) fprintf(stderr, "%s\tinner_size %zu\tinner_blocksize %zu\tleaf_size %zu\tleaf_blocksize %zu\n", rt_size_class_info[i].name, rt_size_class_info[i].inner_size, - rt_size_class_info[i].inner_blocksize, + rt_size_class_info[i].slab_inner_blocksize, rt_size_class_info[i].leaf_size, - rt_size_class_info[i].leaf_blocksize); + rt_size_class_info[i].slab_leaf_blocksize); fprintf(stderr, "max_val = " UINT64_FORMAT "\n", tree->ctl->max_val); if (!RTPointerIsValid(tree->ctl->root)) diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c index ad169882af..e77aea10e2 100644 --- a/src/backend/utils/mmgr/dsa.c +++ b/src/backend/utils/mmgr/dsa.c @@ -1208,6 +1208,48 @@ dsa_minimum_size(void) return pages * FPM_PAGE_SIZE; } +size_t +dsa_get_size_class(size_t size) +{ + uint16 size_class; + + if (size > dsa_size_classes[lengthof(dsa_size_classes) - 1]) + return size; + else if (size < lengthof(dsa_size_class_map) * DSA_SIZE_CLASS_MAP_QUANTUM) + { + int mapidx; + + /* For smaller sizes we have a lookup table... */ + mapidx = ((size + DSA_SIZE_CLASS_MAP_QUANTUM - 1) / + DSA_SIZE_CLASS_MAP_QUANTUM) - 1; + size_class = dsa_size_class_map[mapidx]; + } + else + { + uint16 min; + uint16 max; + + /* ... and for the rest we search by binary chop. */ + min = dsa_size_class_map[lengthof(dsa_size_class_map) - 1]; + max = lengthof(dsa_size_classes) - 1; + + while (min < max) + { + uint16 mid = (min + max) / 2; + uint16 class_size = dsa_size_classes[mid]; + + if (class_size < size) + min = mid + 1; + else + max = mid; + } + + size_class = min; + } + + return dsa_size_classes[size_class]; +} + /* * Workhorse function for dsa_create and dsa_create_in_place. */ diff --git a/src/include/utils/dsa.h b/src/include/utils/dsa.h index dad06adecc..a17c4eb88c 100644 --- a/src/include/utils/dsa.h +++ b/src/include/utils/dsa.h @@ -118,6 +118,7 @@ extern dsa_pointer dsa_allocate_extended(dsa_area *area, size_t size, int flags) extern void dsa_free(dsa_area *area, dsa_pointer dp); extern void *dsa_get_address(dsa_area *area, dsa_pointer dp); extern size_t dsa_get_total_size(dsa_area *area); +extern size_t dsa_get_size_class(size_t size); extern void dsa_trim(dsa_area *area); extern void dsa_dump(dsa_area *area); -- 2.31.1