From c8557c827481a0ec6f6ff5cecdf2299a5743d052 Mon Sep 17 00:00:00 2001 From: Nikita Malakhov Date: Thu, 14 Mar 2024 09:12:32 +0300 Subject: [PATCH] [WIP] Shared detoast datum patch v2 with toast slices caching added diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 3547cdba56..8beb5f8e68 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -20,6 +20,8 @@ #include "common/int.h" #include "common/pg_lzcompress.h" #include "utils/expandeddatum.h" +#include "utils/hsearch.h" +#include "utils/memutils.h" #include "utils/rel.h" static struct varlena *toast_fetch_datum(struct varlena *attr); @@ -29,6 +31,43 @@ static struct varlena *toast_fetch_datum_slice(struct varlena *attr, static struct varlena *toast_decompress_datum(struct varlena *attr); static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); +bool enable_toast_cache = false; + +HTAB *toastCache = NULL; +MemoryContext ToastCacheContext = NULL; +MemoryContext ToastSliceContext = NULL; + +static uint64 toast_cache_hits = 0; +static uint64 toast_cache_misses = 0; + +typedef struct toast_cache_key { + Oid toastrelid; + Oid valueid; +} toast_cache_key; + +typedef struct toast_cache_entry { + toast_cache_key key; + int32 size; + uintptr_t slices; + void *data; +} toast_cache_entry; + +typedef struct toast_slice_entry { + int32 offset; + int32 length; + void *data; +} toast_slice_entry; + +static toast_cache_entry *toast_cache_lookup(Oid toastrelid, Oid valueid); +static void toast_cache_add_entry(Oid toastrelid, Oid valueid, struct varlena *attr); +static void +toast_cache_add_slice_entry(Oid toastrelid, Oid valueid, struct varlena *attr, + int32 sliceoffset, int32 slicelength); +void toast_cache_add_slice_datum(Oid toastrelid, Oid valueid, struct varlena *attr, int32 offset, int32 length); +static void toast_cache_remove_entry(Oid toastrelid, Oid valueid); +static struct varlena * +toast_cache_lookup_slice(Oid toastrelid, Oid valueid, int32 offset, int32 length); + /* ---------- * detoast_external_attr - * @@ -51,7 +90,24 @@ detoast_external_attr(struct varlena *attr) /* * This is an external stored plain value */ + struct varatt_external toast_pointer; + + /* Must copy to access aligned fields */ + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + /* first try to lookup in the toast cache */ + result = toast_cache_lookup_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid); + if (result) + return result; + + /* + * This is an externally stored datum --- fetch it back from there + */ result = toast_fetch_datum(attr); + + if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + toast_cache_add_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid, result); + } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -117,6 +173,21 @@ detoast_attr(struct varlena *attr) { if (VARATT_IS_EXTERNAL_ONDISK(attr)) { + struct varlena *result; + struct varatt_external toast_pointer; + + /* Get toast pointer */ + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + /* try lookup in the toast cache */ + + /* first try to lookup in the toast cache */ + result = toast_cache_lookup_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid); + + /* cache hit, we're done */ + if (result) + return result; + /* * This is an externally stored datum --- fetch it back from there */ @@ -129,6 +200,8 @@ detoast_attr(struct varlena *attr) attr = toast_decompress_datum(tmp); pfree(tmp); } + + toast_cache_add_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid, attr); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -206,10 +279,12 @@ detoast_attr_slice(struct varlena *attr, int32 sliceoffset, int32 slicelength) { struct varlena *preslice; - struct varlena *result; + struct varlena *result = NULL; char *attrdata; int32 slicelimit; int32 attrsize; + Oid toastrelid = InvalidOid; + Oid valueid = InvalidOid; if (sliceoffset < 0) elog(ERROR, "invalid sliceoffset: %d", sliceoffset); @@ -229,9 +304,26 @@ detoast_attr_slice(struct varlena *attr, VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + /* set the cache key */ + /* try lookup in the toast cache */ + toastrelid = toast_pointer.va_toastrelid; + valueid = toast_pointer.va_valueid; + + /* first try to lookup in the toast cache */ + result = toast_cache_lookup_slice(toastrelid, valueid, sliceoffset, slicelength); + + /* cache hit, we're done */ + if (result) + return result; + /* fast path for non-compressed external datums */ if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) - return toast_fetch_datum_slice(attr, sliceoffset, slicelength); + { + result = toast_fetch_datum_slice(attr, sliceoffset, slicelength); + toast_cache_add_slice_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid, result, sliceoffset, slicelength); + return result; + } /* * For compressed values, we need to fetch enough slices to decompress @@ -259,10 +351,13 @@ detoast_attr_slice(struct varlena *attr, * Fetch enough compressed slices (compressed marker will get set * automatically). */ + preslice = toast_fetch_datum_slice(attr, 0, max_size); } else + { preslice = toast_fetch_datum(attr); + } } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -296,6 +391,14 @@ detoast_attr_slice(struct varlena *attr, else preslice = toast_decompress_datum(tmp); + if(OidIsValid(toastrelid)) + { + if(slicelimit >= 0) + toast_cache_add_slice_datum(toastrelid, valueid, preslice, 0, slicelimit); + else + toast_cache_add_datum(toastrelid, valueid, preslice); + } + if (tmp != attr) pfree(tmp); } @@ -644,3 +747,344 @@ toast_datum_size(Datum value) } return result; } + +static bool +toast_cache_init(void) +{ + HASHCTL ctl; + + if (!enable_toast_cache) + return false; + + /* already initialized */ + if (toastCache) + return true; + + toast_cache_hits = 0; + toast_cache_misses = 0; + + /* FIXME should really be per transaction */ + ToastCacheContext = AllocSetContextCreate(TopTransactionContext, + "TOAST cache context", + ALLOCSET_DEFAULT_SIZES); + + ToastSliceContext = AllocSetContextCreate(TopTransactionContext, + "TOAST cached slices context", + ALLOCSET_DEFAULT_SIZES); + + /* Make a new hash table for the cache */ + ctl.keysize = sizeof(toast_cache_key); + ctl.entrysize = sizeof(toast_cache_entry); + ctl.hcxt = ToastCacheContext; + + toastCache = hash_create("TOAST cache", + 128, &ctl, + HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); + + return true; +} + +static toast_cache_entry * +toast_cache_lookup(Oid toastrelid, Oid valueid) +{ + toast_cache_key key; + toast_cache_entry *entry; + bool found = false; + + /* make sure cache initialized */ + if (!toast_cache_init()) + return NULL; + + key.toastrelid = toastrelid; + key.valueid = valueid; + + entry = (toast_cache_entry *) hash_search(toastCache, &key, + HASH_FIND, &found); + + if (found) + toast_cache_hits++; + else + { + toast_cache_misses++; + return NULL; + } + + return entry; +} + +static void +toast_cache_add_entry(Oid toastrelid, Oid valueid, struct varlena *attr) +{ + bool found = false; + ListCell *lc = NULL; + toast_cache_key key; + toast_cache_entry *entry; + MemoryContext ctx; + + /* make sure cache initialized */ + if (!toast_cache_init()) + return; + + key.toastrelid = toastrelid; + key.valueid = valueid; + + entry = (toast_cache_entry *) hash_search(toastCache, &key, + HASH_FIND, &found); + + /* replace-only flag means that if value was not found - do not*/ + if(found) + { + /* if found while adding entry - we have to free old data and replace it with new one */ + /* if already have some slices - clean up, full value is stored instead of slices */ + if(entry->slices != 0) + { + ctx = MemoryContextSwitchTo(ToastSliceContext); + + foreach(lc, (List *) entry->slices) + { + toast_slice_entry *slice = (toast_slice_entry *) lfirst(lc); + pfree(slice->data); + } + MemoryContextSwitchTo(ctx); + } + } + else + { + entry = (toast_cache_entry *) hash_search(toastCache, &key, + HASH_ENTER_NULL, &found); + if(!entry) + return; + } + + entry->slices = (uintptr_t) NIL; + + entry->size = VARSIZE_ANY_EXHDR(attr); + entry->data = MemoryContextAlloc(ToastCacheContext, entry->size); + memcpy(entry->data, VARDATA_ANY(attr), entry->size); +} + +static void +toast_cache_add_slice_entry(Oid toastrelid, Oid valueid, struct varlena *attr, + int32 sliceoffset, int32 slicelength) +{ + ListCell *lc = NULL; + bool found = false; + toast_cache_key key; + toast_cache_entry *entry; + toast_slice_entry *slice; + + /* make sure cache initialized */ + if (!toast_cache_init()) + return; + + key.toastrelid = toastrelid; + key.valueid = valueid; + + entry = (toast_cache_entry *) hash_search(toastCache, &key, + HASH_FIND, &found); + + if(found) + { + toast_cache_hits++; + } + else + toast_cache_misses++; + + if(!found) + { + List *slices = NIL; + slice = (toast_slice_entry *) MemoryContextAlloc(ToastSliceContext, sizeof(toast_slice_entry)); + slice->offset = sliceoffset; + slice->length = slicelength; + slice->data = MemoryContextAlloc(ToastSliceContext, slicelength); + memcpy(slice->data, VARDATA_ANY(attr), slicelength); + + entry = (toast_cache_entry *) hash_search(toastCache, &key, + HASH_ENTER_NULL, &found); + + slices = lappend(slices, slice); + entry->slices = (uintptr_t) slices; + entry->size = slicelength; + entry->data = NULL; + return; + } + + found = false; + + if((List *) entry->slices != NIL) + { + MemoryContext ctx = MemoryContextSwitchTo(ToastSliceContext); + + foreach(lc, (List *) entry->slices) + { + toast_slice_entry *lookup_slice = (toast_slice_entry *) lfirst(lc); + + if(sliceoffset >= lookup_slice->offset + && sliceoffset <= (lookup_slice->offset + lookup_slice->length) + && (sliceoffset + slicelength) <= (lookup_slice->offset + lookup_slice->length)) + { + found = true; + break; + } + } + MemoryContextSwitchTo(ctx); + } + + if(!found) + { + slice = (toast_slice_entry *) MemoryContextAlloc(ToastSliceContext, sizeof(toast_slice_entry)); + slice->offset = sliceoffset; + slice->length = slicelength; + slice->data = MemoryContextAlloc(ToastSliceContext, slicelength); + memcpy(slice->data, VARDATA_ANY(attr), slicelength); + + entry->slices = (uintptr_t) lappend((List *) entry->slices, slice); + entry->size += slicelength; + } +} + +static void +toast_cache_remove_entry(Oid toastrelid, Oid valueid) +{ + ListCell *lc = NULL; + bool found = false; + toast_cache_key key; + toast_cache_entry *entry = NULL; + + /* make sure cache initialized */ + if (!toast_cache_init()) + return; + + key.toastrelid = toastrelid; + key.valueid = valueid; + + entry = (toast_cache_entry *) hash_search(toastCache, &key, + HASH_REMOVE, &found); + + if(entry) + { + toast_cache_hits++; + if(entry->slices != 0) + { + foreach(lc, (List *) entry->slices) + { + toast_slice_entry *slice = (toast_slice_entry *) lfirst(lc); + pfree(slice->data); + } + } + pfree(entry->data); + } + else + toast_cache_misses++; +} + +static void +toast_cache_destroy(void) +{ + if (!toastCache) + return; + + elog(LOG, "AtEOXact_ToastCache hits %ld misses %ld ratio %.2f%%", + toast_cache_hits, toast_cache_misses, + (toast_cache_hits * 100.0) / Max(1, toast_cache_hits + toast_cache_misses)); + + MemoryContextDelete(ToastCacheContext); + MemoryContextDelete(ToastSliceContext); + + toastCache = NULL; + ToastCacheContext = NULL; + ToastSliceContext = NULL; +} + +void +AtEOXact_ToastCache(void) +{ + toast_cache_destroy(); +} + +struct varlena * +toast_cache_lookup_datum(Oid toastrelid, Oid valueid) +{ + toast_cache_entry *entry; + struct varlena *result = NULL; + + entry = toast_cache_lookup(toastrelid, valueid); + + if (!entry) + return NULL; + + if(entry->data == NULL) + return NULL; + + result = (struct varlena *) palloc(entry->size + VARHDRSZ); + SET_VARSIZE(result, entry->size + VARHDRSZ); + + memcpy(VARDATA(result), entry->data, entry->size); + + return result; +} + +static struct varlena * +toast_cache_lookup_slice(Oid toastrelid, Oid valueid, int32 offset, int32 length) +{ + ListCell *lc = NULL; + toast_cache_entry *entry; + struct varlena *result = NULL; + + /* result must be pre-alloc'ed by caller */ + entry = toast_cache_lookup(toastrelid, valueid); + + if (!entry) + return NULL; + + if(entry->data != NULL) + { + result = (struct varlena *) palloc(length + VARHDRSZ); + SET_VARSIZE(result, length + VARHDRSZ); + memcpy(VARDATA(result), ((char *) entry->data + offset), length); + return result; + } + + if(entry->slices == 0 || (List *) entry->slices == NIL) + return NULL; + + if((List *) entry->slices != NIL) + { + foreach(lc, (List *) entry->slices) + { + + toast_slice_entry *slice = (toast_slice_entry *) lfirst(lc); + + if(offset >= slice->offset + && (offset + length) <= (slice->offset + slice->length)) + { + result = (struct varlena *) palloc(length + VARHDRSZ); + SET_VARSIZE(result, length + VARHDRSZ); + memcpy(VARDATA(result), ((char *) slice->data + offset - slice->offset), length); + break; + } + else + continue; + } + } + + return result; +} + +void +toast_cache_add_slice_datum(Oid toastrelid, Oid valueid, struct varlena *attr, int32 offset, int32 length) +{ + toast_cache_add_slice_entry(toastrelid, valueid, attr, offset, length); +} + +void +toast_cache_remove_datum(Oid toastrelid, Oid valueid) +{ + toast_cache_remove_entry(toastrelid, valueid); +} + +void +toast_cache_add_datum(Oid toastrelid, Oid valueid, struct varlena *attr) +{ + return toast_cache_add_entry(toastrelid, valueid, attr); +} diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index 3bcde2ca1b..c93800a61b 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -260,9 +260,33 @@ toast_tuple_externalize(ToastTupleContext *ttc, int attribute, int options) Datum old_value = *value; ToastAttrInfo *attr = &ttc->ttc_attr[attribute]; + /* + * Should we cache any value moved out-of-line? + * For the sake of using it later in transaction + * it makes sense, but in case of mass insertions + * we could quickly run out of cache memory + * so we cache value only if the value with this + * key is already there - we consider it as been + * used previously + */ + if(value + && attr->tai_oldexternal != NULL + && VARATT_IS_EXTERNAL_ONDISK(attr->tai_oldexternal)) + { + struct varlena *tmp = NULL; + struct varatt_external toast_pointer; + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr->tai_oldexternal); + + /* add value to the toast cache with replace-only flag*/ + tmp = toast_cache_lookup_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid); + if(tmp) + toast_cache_add_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid, (struct varlena *) value); + } + attr->tai_colflags |= TOASTCOL_IGNORE; *value = toast_save_datum(ttc->ttc_rel, old_value, attr->tai_oldexternal, options); + if ((attr->tai_colflags & TOASTCOL_NEEDS_FREE) != 0) pfree(DatumGetPointer(old_value)); attr->tai_colflags |= TOASTCOL_NEEDS_FREE; @@ -306,7 +330,17 @@ toast_tuple_cleanup(ToastTupleContext *ttc) ToastAttrInfo *attr = &ttc->ttc_attr[i]; if ((attr->tai_colflags & TOASTCOL_NEEDS_DELETE_OLD) != 0) + { + if(VARATT_IS_EXTERNAL(ttc->ttc_oldvalues[i])) + { + struct varatt_external toast_pointer; + VARATT_EXTERNAL_GET_POINTER(toast_pointer, ttc->ttc_oldvalues[i]); + + toast_cache_remove_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid); + } + toast_delete_datum(ttc->ttc_rel, ttc->ttc_oldvalues[i], false); + } } } } @@ -332,7 +366,14 @@ toast_delete_external(Relation rel, const Datum *values, const bool *isnull, if (isnull[i]) continue; else if (VARATT_IS_EXTERNAL_ONDISK(value)) + { + struct varatt_external toast_pointer; + VARATT_EXTERNAL_GET_POINTER(toast_pointer, value); + + toast_cache_remove_datum(toast_pointer.va_toastrelid, toast_pointer.va_valueid); + toast_delete_datum(rel, value, is_speculative); + } } } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 464858117e..f00e3b63a1 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -21,6 +21,7 @@ #include #include "access/commit_ts.h" +#include "access/detoast.h" #include "access/multixact.h" #include "access/parallel.h" #include "access/subtrans.h" @@ -2373,6 +2374,7 @@ CommitTransaction(void) AtEOXact_Snapshot(true, false); AtEOXact_ApplyLauncher(true); AtEOXact_LogicalRepWorkers(true); + AtEOXact_ToastCache(); pgstat_report_xact_timestamp(0); ResourceOwnerDelete(TopTransactionResourceOwner); @@ -2659,6 +2661,7 @@ PrepareTransaction(void) /* we treat PREPARE as ROLLBACK so far as waking workers goes */ AtEOXact_ApplyLauncher(false); AtEOXact_LogicalRepWorkers(false); + AtEOXact_ToastCache(); pgstat_report_xact_timestamp(0); CurrentResourceOwner = NULL; @@ -2872,6 +2875,7 @@ AbortTransaction(void) AtEOXact_PgStat(false, is_parallel_worker); AtEOXact_ApplyLauncher(false); AtEOXact_LogicalRepWorkers(false); + AtEOXact_ToastCache(); pgstat_report_xact_timestamp(0); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 7fe58518d7..1bf0325b78 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -27,6 +27,7 @@ #endif #include "access/commit_ts.h" +#include "access/detoast.h" #include "access/gin.h" #include "access/toast_compression.h" #include "access/twophase.h" @@ -1060,6 +1061,16 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_toast_cache", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables caching of detoasted values."), + NULL, + GUC_EXPLAIN + }, + &enable_toast_cache, + false, + NULL, NULL, NULL + }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h index 12d8cdb356..832f0aa1ca 100644 --- a/src/include/access/detoast.h +++ b/src/include/access/detoast.h @@ -12,6 +12,9 @@ #ifndef DETOAST_H #define DETOAST_H +#include "postgres.h" +#include "fmgr.h" + /* * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum * into a local "struct varatt_external" toast pointer. This should be @@ -79,4 +82,12 @@ extern Size toast_raw_datum_size(Datum value); */ extern Size toast_datum_size(Datum value); +extern PGDLLIMPORT bool enable_toast_cache; +extern void AtEOXact_ToastCache(void); + +extern struct varlena *toast_cache_lookup_datum(Oid toastrelid, Oid valueid); +extern void toast_cache_add_datum(Oid toastrelid, Oid valueid, struct varlena *attr); +extern void toast_cache_add_slice_datum(Oid toastrelid, Oid valueid, struct varlena *attr, int32 offset, int32 length); +extern void toast_cache_remove_datum(Oid toastrelid, Oid valueid); + #endif /* DETOAST_H */ -- 2.25.1