Yet more tsearch refactoring - Mailing list pgsql-patches
From | Heikki Linnakangas |
---|---|
Subject | Yet more tsearch refactoring |
Date | |
Msg-id | 46E57082.3060406@enterprisedb.com Whole thread Raw |
Responses |
Re: Yet more tsearch refactoring
|
List | pgsql-patches |
* Defined new struct WordEntryPosVector that holds a uint16 length and a variable size array of WordEntries. This replaces the previous convention of a variable size uint16 array, with the first element implying the length. WordEntryPosVector has the same layout in memory, but is more readable in source code. The POSDATAPTR and POSDATALEN macros are still used, though it would now be more readable to access the fields in WordEntryPosVector directly. * Removed needfree field from DocRepresentation. It was always set to false. * Miscellaneous other commenting and refactoring -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com Index: src/backend/utils/adt/tsginidx.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/tsginidx.c,v retrieving revision 1.3 diff -c -r1.3 tsginidx.c *** src/backend/utils/adt/tsginidx.c 7 Sep 2007 16:03:40 -0000 1.3 --- src/backend/utils/adt/tsginidx.c 10 Sep 2007 12:04:14 -0000 *************** *** 25,37 **** int32 *nentries = (int32 *) PG_GETARG_POINTER(1); Datum *entries = NULL; ! *nentries = 0; if (vector->size > 0) { int i; WordEntry *we = ARRPTR(vector); - *nentries = (uint32) vector->size; entries = (Datum *) palloc(sizeof(Datum) * vector->size); for (i = 0; i < vector->size; i++) --- 25,36 ---- int32 *nentries = (int32 *) PG_GETARG_POINTER(1); Datum *entries = NULL; ! *nentries = vector->size; if (vector->size > 0) { int i; WordEntry *we = ARRPTR(vector); entries = (Datum *) palloc(sizeof(Datum) * vector->size); for (i = 0; i < vector->size; i++) *************** *** 134,144 **** if (query->size > 0) { ! int4 i, j = 0; QueryItem *item; GinChkVal gcv; gcv.frst = item = GETQUERY(query); gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size); --- 133,151 ---- if (query->size > 0) { ! int i, j = 0; QueryItem *item; GinChkVal gcv; + /* + * check-parameter array has one entry for each value (operand) in the + * query. We expand that array into mapped_check, so that there's one + * entry in mapped_check for every node in the query, including + * operators, to allow quick lookups in checkcondition_gin. Only the + * entries corresponding operands are actually used. + */ + gcv.frst = item = GETQUERY(query); gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size); Index: src/backend/utils/adt/tsgistidx.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/tsgistidx.c,v retrieving revision 1.3 diff -c -r1.3 tsgistidx.c *** src/backend/utils/adt/tsgistidx.c 7 Sep 2007 15:09:56 -0000 1.3 --- src/backend/utils/adt/tsgistidx.c 10 Sep 2007 15:56:09 -0000 *************** *** 133,152 **** } static int ! compareint(const void *a, const void *b) { ! if (*((int4 *) a) == *((int4 *) b)) return 0; ! return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1; } static int uniqueint(int4 *a, int4 l) { int4 *ptr, *res; ! if (l == 1) return l; ptr = res = a; --- 133,159 ---- } static int ! compareint(const void *va, const void *vb) { ! int4 a = *((int4 *) va); ! int4 b = *((int4 *) vb); ! ! if (a == b) return 0; ! return (a > b) ? 1 : -1; } + /* + * Removes duplicates from an array of int4. 'l' is + * size of the input array. Returns the new size of the array. + */ static int uniqueint(int4 *a, int4 l) { int4 *ptr, *res; ! if (l <= 1) return l; ptr = res = a; *************** *** 570,581 **** } SPLITCOST; static int ! comparecost(const void *a, const void *b) { ! if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) return 0; else ! return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; } --- 577,591 ---- } SPLITCOST; static int ! comparecost(const void *va, const void *vb) { ! SPLITCOST *a = (SPLITCOST *) va; ! SPLITCOST *b = (SPLITCOST *) vb; ! ! if (a->cost == b->cost) return 0; else ! return (a->cost > b->cost) ? 1 : -1; } Index: src/backend/utils/adt/tsrank.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/tsrank.c,v retrieving revision 1.4 diff -c -r1.4 tsrank.c *** src/backend/utils/adt/tsrank.c 7 Sep 2007 16:03:40 -0000 1.4 --- src/backend/utils/adt/tsrank.c 10 Sep 2007 15:56:51 -0000 *************** *** 53,74 **** { WordEntry *ptr = ARRPTR(t), *end = (WordEntry *) STRPTR(t); ! int len = 0, ! clen; while (ptr < end) { ! if ((clen = POSDATALEN(t, ptr)) == 0) len += 1; else len += clen; ptr++; } return len; } ! static int4 WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item) { if (ptr->len == item->length) --- 53,76 ---- { WordEntry *ptr = ARRPTR(t), *end = (WordEntry *) STRPTR(t); ! int len = 0; while (ptr < end) { ! int clen = POSDATALEN(t, ptr); ! ! if (clen == 0) len += 1; else len += clen; + ptr++; } return len; } ! static int WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item) { if (ptr->len == item->length) *************** *** 80,85 **** --- 82,91 ---- return (ptr->len > item->length) ? 1 : -1; } + /* + * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q' + * is the TSQuery containing 'item'. Returns NULL if not found. + */ static WordEntry * find_wordentry(TSVector t, TSQuery q, QueryOperand *item) { *************** *** 178,192 **** } /* A dummy WordEntryPos array to use when haspos is false */ ! static WordEntryPos POSNULL[] = { 1, /* Number of elements that follow */ ! 0 }; static float calc_rank_and(float *w, TSVector t, TSQuery q) { ! uint16 **pos; int i, k, l, --- 184,198 ---- } /* A dummy WordEntryPos array to use when haspos is false */ ! static WordEntryPosVector POSNULL = { 1, /* Number of elements that follow */ ! { 0 } }; static float calc_rank_and(float *w, TSVector t, TSQuery q) { ! WordEntryPosVector **pos; int i, k, l, *************** *** 207,215 **** pfree(item); return calc_rank_or(w, t, q); } ! pos = (uint16 **) palloc(sizeof(uint16 *) * q->size); ! memset(pos, 0, sizeof(uint16 *) * q->size); ! WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1); for (i = 0; i < size; i++) { --- 213,220 ---- pfree(item); return calc_rank_or(w, t, q); } ! pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size); ! WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1); for (i = 0; i < size; i++) { *************** *** 218,242 **** continue; if (entry->haspos) ! pos[i] = (uint16 *) _POSDATAPTR(t, entry); else ! pos[i] = (uint16 *) POSNULL; ! dimt = *(uint16 *) (pos[i]); ! post = (WordEntryPos *) (pos[i] + 1); for (k = 0; k < i; k++) { if (!pos[k]) continue; ! lenct = *(uint16 *) (pos[k]); ! ct = (WordEntryPos *) (pos[k] + 1); for (l = 0; l < dimt; l++) { for (p = 0; p < lenct; p++) { dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); ! if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL))) { float curw; --- 223,247 ---- continue; if (entry->haspos) ! pos[i] = _POSVECPTR(t, entry); else ! pos[i] = &POSNULL; ! dimt = pos[i]->npos; ! post = pos[i]->pos; for (k = 0; k < i; k++) { if (!pos[k]) continue; ! lenct = pos[k]->npos; ! ct = pos[k]->pos; for (l = 0; l < dimt; l++) { for (p = 0; p < lenct; p++) { dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); ! if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL))) { float curw; *************** *** 285,292 **** } else { ! dimt = *(uint16 *) POSNULL; ! post = POSNULL + 1; } resj = 0.0; --- 290,297 ---- } else { ! dimt = POSNULL.npos; ! post = POSNULL.pos; } resj = 0.0; *************** *** 456,472 **** { QueryItem **item; int16 nitem; - bool needfree; uint8 wclass; int32 pos; } DocRepresentation; static int ! compareDocR(const void *a, const void *b) { ! if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos) return 0; ! return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1; } static bool --- 461,479 ---- { QueryItem **item; int16 nitem; uint8 wclass; int32 pos; } DocRepresentation; static int ! compareDocR(const void *va, const void *vb) { ! DocRepresentation *a = (DocRepresentation *) va; ! DocRepresentation *b = (DocRepresentation *) vb; ! ! if (a->pos == b->pos) return 0; ! return (a->pos > b->pos) ? 1 : -1; } static bool *************** *** 547,557 **** ptr = doc + lastpos; ! /* find lower bound of cover from founded upper bound, move down */ while (ptr >= doc + ext->pos) { for (i = 0; i < ptr->nitem; i++) ! if(ptr->item[i]->type == QI_VAL) /* XXX */ ptr->item[i]->operand.istrue = 1; if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand)) { --- 554,564 ---- ptr = doc + lastpos; ! /* find lower bound of cover from found upper bound, move down */ while (ptr >= doc + ext->pos) { for (i = 0; i < ptr->nitem; i++) ! if(ptr->item[i]->type == QI_VAL) ptr->item[i]->operand.istrue = 1; if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand)) { *************** *** 620,627 **** } else { ! dimt = *(uint16 *) POSNULL; ! post = POSNULL + 1; } while (cur + dimt >= len) --- 627,634 ---- } else { ! dimt = POSNULL.npos; ! post = POSNULL.pos; } while (cur + dimt >= len) *************** *** 636,642 **** { int k; - doc[cur].needfree = false; doc[cur].nitem = 0; doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size); --- 643,648 ---- *************** *** 658,664 **** } else { - doc[cur].needfree = false; doc[cur].nitem = doc[cur - 1].nitem; doc[cur].item = doc[cur - 1].item; } --- 664,669 ---- *************** *** 764,772 **** if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) Wdoc /= log((double) (txt->size + 1)) / log(2.0); - for (i = 0; i < doclen; i++) - if (doc[i].needfree) - pfree(doc[i].item); pfree(doc); return (float4) Wdoc; --- 769,774 ---- Index: src/backend/utils/adt/tsvector_op.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/tsvector_op.c,v retrieving revision 1.4 diff -c -r1.4 tsvector_op.c *** src/backend/utils/adt/tsvector_op.c 7 Sep 2007 16:03:40 -0000 1.4 --- src/backend/utils/adt/tsvector_op.c 10 Sep 2007 15:55:04 -0000 *************** *** 269,275 **** static int4 add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos) { ! uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr); int i; uint16 slen = POSDATALEN(src, srcptr), startlen; --- 269,275 ---- static int4 add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos) { ! uint16 *clen = &_POSVECPTR(dest, destptr)->npos; int i; uint16 slen = POSDATALEN(src, srcptr), startlen; *************** *** 354,360 **** if (ptr->haspos) { cur += SHORTALIGN(ptr1->len); ! memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } else --- 354,360 ---- if (ptr->haspos) { cur += SHORTALIGN(ptr1->len); ! memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } else *************** *** 399,405 **** cur += SHORTALIGN(ptr1->len); if (ptr1->haspos) { ! memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); --- 399,405 ---- cur += SHORTALIGN(ptr1->len); if (ptr1->haspos) { ! memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); if (ptr2->haspos) cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); *************** *** 434,440 **** if (ptr->haspos) { cur += SHORTALIGN(ptr1->len); ! memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } else --- 434,440 ---- if (ptr->haspos) { cur += SHORTALIGN(ptr1->len); ! memcpy(cur, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); } else *************** *** 499,508 **** * check weight info */ static bool ! checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item) { ! WordEntryPos *ptr = (WordEntryPos *) (chkval->values + SHORTALIGN(val->pos + val->len) + sizeof(uint16)); ! uint16 len = *((uint16 *) (chkval->values + SHORTALIGN(val->pos + val->len))); while (len--) { --- 499,515 ---- * check weight info */ static bool ! checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item) { ! WordEntryPosVector *posvec; ! WordEntryPos *ptr; ! uint16 len; ! ! posvec = (WordEntryPosVector *) ! (chkval->values + SHORTALIGN(val->pos + val->len)); ! ! len = posvec->npos; ! ptr = posvec->pos; while (len--) { *************** *** 674,680 **** } /* ! * Statistics of tsvector */ static int check_weight(TSVector txt, WordEntry * wptr, int8 weight) --- 681,693 ---- } /* ! * ts_stat statistic function support ! */ ! ! ! /* ! * Returns the number of positions in value 'wptr' within tsvector 'txt', ! * that have a weight equal to one of the weights in 'weight' bitmask. */ static int check_weight(TSVector txt, WordEntry * wptr, int8 weight) *************** *** 824,829 **** --- 837,854 ---- return newstat; } + /* + * This is written like a custom aggregate function, because the + * original plan was to do just that. Unfortunately, an aggregate function + * can't return a set, so that plan was abandoned. If that limitation is + * lifted in the future, ts_stat could be a real aggregate function so that + * you could use it like this: + * + * SELECT ts_stat(vector_column) FROM vector_table; + * + * where vector_column is a tsvector-type column in vector_table. + */ + static tsstat * ts_accum(tsstat * stat, Datum data) { Index: src/include/tsearch/ts_type.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/tsearch/ts_type.h,v retrieving revision 1.4 diff -c -r1.4 ts_type.h *** src/include/tsearch/ts_type.h 7 Sep 2007 16:03:40 -0000 1.4 --- src/include/tsearch/ts_type.h 10 Sep 2007 14:47:24 -0000 *************** *** 43,48 **** --- 43,55 ---- typedef uint16 WordEntryPos; + typedef struct + { + uint16 npos; + WordEntryPos pos[1]; /* var length */ + } WordEntryPosVector; + + #define WEP_GETWEIGHT(x) ( (x) >> 14 ) #define WEP_GETPOS(x) ( (x) & 0x3fff ) *************** *** 88,96 **** /* returns a pointer to the beginning of lexemes */ #define STRPTR(x) ( (char *) &(x)->entries[x->size] ) ! #define _POSDATAPTR(x,e) (STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)) ! #define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) ! #define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) ) /* * fmgr interface macros --- 95,103 ---- /* returns a pointer to the beginning of lexemes */ #define STRPTR(x) ( (char *) &(x)->entries[x->size] ) ! #define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))) ! #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 ) ! #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos) /* * fmgr interface macros
pgsql-patches by date: