Bunch of tsearch fixes and cleanup - Mailing list pgsql-patches
From | Heikki Linnakangas |
---|---|
Subject | Bunch of tsearch fixes and cleanup |
Date | |
Msg-id | 46CD9713.6080107@enterprisedb.com Whole thread Raw |
Responses |
Re: Bunch of tsearch fixes and cleanup
|
List | pgsql-patches |
Fixes the following bugs: - ispell initialization crashed on empty dictionary file - ispell initialization crashed on affix file with prefixes but no suffixes - stop words file was ran through pg_verify_mbstr, with database encoding, but it's later interpreted as being UTF-8. Now verifies that it's UTF-8, regardless of database encoding. Other changes: - readstopwords now sorts the stop words after loading them. Removed the separate sortstopwords function. - readstopwords calls recode_and_lowerstr directly, instead of using the "wordop" function pointer in StopList struct. All callers used recode_and_lowerstr anyway, so this simplifies the code a little bit. Is there any external dictionary implementations that would require different behavior? - bunch of comments added, typos fixed, and other cleanup The code still needs lots of love, but it's a start... -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com Index: src/backend/snowball/dict_snowball.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/snowball/dict_snowball.c,v retrieving revision 1.2 diff -c -r1.2 dict_snowball.c *** src/backend/snowball/dict_snowball.c 22 Aug 2007 01:39:44 -0000 1.2 --- src/backend/snowball/dict_snowball.c 23 Aug 2007 10:55:53 -0000 *************** *** 192,198 **** ListCell *l; d = (DictSnowball *) palloc0(sizeof(DictSnowball)); - d->stoplist.wordop = recode_and_lowerstr; foreach(l, dictoptions) { --- 192,197 ---- *************** *** 205,211 **** (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple StopWords parameters"))); readstoplist(defGetString(defel), &d->stoplist); - sortstoplist(&d->stoplist); stoploaded = true; } else if (pg_strcasecmp("Language", defel->defname) == 0) --- 204,209 ---- Index: src/backend/tsearch/dict_ispell.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/dict_ispell.c,v retrieving revision 1.2 diff -c -r1.2 dict_ispell.c *** src/backend/tsearch/dict_ispell.c 22 Aug 2007 01:39:44 -0000 1.2 --- src/backend/tsearch/dict_ispell.c 23 Aug 2007 10:57:12 -0000 *************** *** 39,45 **** ListCell *l; d = (DictISpell *) palloc0(sizeof(DictISpell)); - d->stoplist.wordop = recode_and_lowerstr; foreach(l, dictoptions) { --- 39,44 ---- *************** *** 74,80 **** (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple StopWords parameters"))); readstoplist(defGetString(defel), &(d->stoplist)); - sortstoplist(&(d->stoplist)); stoploaded = true; } else --- 73,78 ---- Index: src/backend/tsearch/dict_simple.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/dict_simple.c,v retrieving revision 1.2 diff -c -r1.2 dict_simple.c *** src/backend/tsearch/dict_simple.c 22 Aug 2007 01:39:44 -0000 1.2 --- src/backend/tsearch/dict_simple.c 23 Aug 2007 11:15:55 -0000 *************** *** 23,41 **** typedef struct { StopList stoplist; ! } DictExample; Datum dsimple_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); ! DictExample *d = (DictExample *) palloc0(sizeof(DictExample)); bool stoploaded = false; ListCell *l; - d->stoplist.wordop = recode_and_lowerstr; - foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); --- 23,39 ---- typedef struct { StopList stoplist; ! } DictSimple; Datum dsimple_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); ! DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple)); bool stoploaded = false; ListCell *l; foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); *************** *** 47,53 **** (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple StopWords parameters"))); readstoplist(defGetString(defel), &d->stoplist); - sortstoplist(&d->stoplist); stoploaded = true; } else --- 45,50 ---- *************** *** 65,80 **** Datum dsimple_lexize(PG_FUNCTION_ARGS) { ! DictExample *d = (DictExample *) PG_GETARG_POINTER(0); char *in = (char *) PG_GETARG_POINTER(1); int32 len = PG_GETARG_INT32(2); ! char *txt = lowerstr_with_len(in, len); TSLexeme *res = palloc0(sizeof(TSLexeme) * 2); if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) - { pfree(txt); - } else res[0].lexeme = txt; --- 62,77 ---- Datum dsimple_lexize(PG_FUNCTION_ARGS) { ! DictSimple *d = (DictSimple *) PG_GETARG_POINTER(0); char *in = (char *) PG_GETARG_POINTER(1); int32 len = PG_GETARG_INT32(2); ! char *txt; TSLexeme *res = palloc0(sizeof(TSLexeme) * 2); + txt = lowerstr_with_len(in, len); + if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) pfree(txt); else res[0].lexeme = txt; Index: src/backend/tsearch/dict_synonym.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/dict_synonym.c,v retrieving revision 1.2 diff -c -r1.2 dict_synonym.c *** src/backend/tsearch/dict_synonym.c 22 Aug 2007 04:13:15 -0000 1.2 --- src/backend/tsearch/dict_synonym.c 23 Aug 2007 13:09:47 -0000 *************** *** 31,40 **** typedef struct { ! int len; Syn *syn; } DictSyn; static char * findwrd(char *in, char **end) { --- 31,45 ---- typedef struct { ! int len; /* length of syn array */ Syn *syn; } DictSyn; + /* + * Finds the next whitespace-delimited word within the 'in' string. + * Returns a pointer to the first character of the word, and a pointer + * to the next byte after the last character in the word in *end. + */ static char * findwrd(char *in, char **end) { *************** *** 137,149 **** d->syn[cur].in = recode_and_lowerstr(starti); d->syn[cur].out = recode_and_lowerstr(starto); - if (!(d->syn[cur].in && d->syn[cur].out)) - { - FreeFile(fin); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } cur++; } --- 142,147 ---- *************** *** 151,158 **** FreeFile(fin); d->len = cur; ! if (cur > 1) ! qsort(d->syn, d->len, sizeof(Syn), compareSyn); PG_RETURN_POINTER(d); } --- 149,155 ---- FreeFile(fin); d->len = cur; ! qsort(d->syn, d->len, sizeof(Syn), compareSyn); PG_RETURN_POINTER(d); } *************** *** 179,186 **** if (!found) PG_RETURN_POINTER(NULL); ! res = palloc(sizeof(TSLexeme) * 2); ! memset(res, 0, sizeof(TSLexeme) * 2); res[0].lexeme = pstrdup(found->out); PG_RETURN_POINTER(res); --- 176,182 ---- if (!found) PG_RETURN_POINTER(NULL); ! res = palloc0(sizeof(TSLexeme) * 2); res[0].lexeme = pstrdup(found->out); PG_RETURN_POINTER(res); Index: src/backend/tsearch/spell.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/spell.c,v retrieving revision 1.1 diff -c -r1.1 spell.c *** src/backend/tsearch/spell.c 21 Aug 2007 01:11:18 -0000 1.1 --- src/backend/tsearch/spell.c 23 Aug 2007 14:04:45 -0000 *************** *** 21,28 **** /* ! * during initialization dictionary requires a lot ! * of memory, so it will use temporary context */ static MemoryContext tmpCtx = NULL; --- 21,31 ---- /* ! * Initialization requires a lot of memory that's not needed ! * after the initialization is done. In init function, ! * CurrentMemoryContext is a long lived memory context associated ! * with the dictionary cache entry, so we use a temporary context ! * for the short-lived stuff. */ static MemoryContext tmpCtx = NULL; *************** *** 32,37 **** --- 35,43 ---- static void checkTmpCtx(void) { + /* XXX: This assumes that CurrentMemoryContext doesn't have + * any children other than the one we create here. + */ if (CurrentMemoryContext->firstchild == NULL) { tmpCtx = AllocSetContextCreate(CurrentMemoryContext, *************** *** 78,93 **** } static char * - strnduplicate(char *s, int len) - { - char *d = (char *) palloc(len + 1); - - memcpy(d, s, len); - d[len] = '\0'; - return d; - } - - static char * findchar(char *str, int c) { while (*str) --- 84,89 ---- *************** *** 185,191 **** } Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); strcpy(Conf->Spell[Conf->nspell]->word, word); ! strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, 16); Conf->nspell++; } --- 181,187 ---- } Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); strcpy(Conf->Spell[Conf->nspell]->word, word); ! strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN); Conf->nspell++; } *************** *** 733,745 **** char find[BUFSIZ]; char repl[BUFSIZ]; char *s; ! int suffixes = 0; ! int prefixes = 0; int flag = 0; char flagflags = 0; FILE *affix; int line = 0; ! int oldformat = 0; checkTmpCtx(); --- 729,741 ---- char find[BUFSIZ]; char repl[BUFSIZ]; char *s; ! bool suffixes = false; ! bool prefixes = false; int flag = 0; char flagflags = 0; FILE *affix; int line = 0; ! bool oldformat = false; checkTmpCtx(); *************** *** 777,798 **** Conf->flagval[(unsigned int) *s] = FF_COMPOUNDFLAG; Conf->usecompound = true; } ! oldformat++; continue; } } if (STRNCMP(pstr, "suffixes") == 0) { ! suffixes = 1; ! prefixes = 0; ! oldformat++; continue; } if (STRNCMP(pstr, "prefixes") == 0) { ! suffixes = 0; ! prefixes = 1; ! oldformat++; continue; } if (STRNCMP(pstr, "flag") == 0) --- 773,794 ---- Conf->flagval[(unsigned int) *s] = FF_COMPOUNDFLAG; Conf->usecompound = true; } ! oldformat = true; continue; } } if (STRNCMP(pstr, "suffixes") == 0) { ! suffixes = true; ! prefixes = false; ! oldformat = true; continue; } if (STRNCMP(pstr, "prefixes") == 0) { ! suffixes = false; ! prefixes = true; ! oldformat = true; continue; } if (STRNCMP(pstr, "flag") == 0) *************** *** 802,808 **** while (*s && t_isspace(s)) s++; ! oldformat++; /* allow only single-encoded flags */ if (pg_mblen(s) != 1) --- 798,804 ---- while (*s && t_isspace(s)) s++; ! oldformat = true; /* allow only single-encoded flags */ if (pg_mblen(s) != 1) *************** *** 978,1012 **** void NISortDictionary(IspellDict * Conf) { ! size_t i; ! int naffix = 3; checkTmpCtx(); /* compress affixes */ qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix); ! for (i = 1; i < Conf->nspell; i++) ! if (strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag)) naffix++; Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); ! naffix = 1; ! Conf->AffixData[0] = pstrdup(""); ! Conf->AffixData[1] = pstrdup(Conf->Spell[0]->p.flag); ! Conf->Spell[0]->p.d.affix = 1; ! Conf->Spell[0]->p.d.len = strlen(Conf->Spell[0]->word); ! for (i = 1; i < Conf->nspell; i++) { ! if (strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[naffix])) { ! naffix++; ! Conf->AffixData[naffix] = pstrdup(Conf->Spell[i]->p.flag); } ! Conf->Spell[i]->p.d.affix = naffix; Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); } Conf->lenAffixData = Conf->nAffixData = naffix; qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); --- 974,1021 ---- void NISortDictionary(IspellDict * Conf) { ! int i; ! int naffix = 0; ! int curaffix; checkTmpCtx(); /* compress affixes */ + + /* Count the number of different flags used in the dictionary */ + qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix); ! ! naffix = 0; ! for (i = 0; i < Conf->nspell; i++) ! { ! if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN)) naffix++; + } + /* + * Fill in Conf->AffixData with the affixes that were used + * in the dictionary. Replace textual flag-field of Conf->Spell + * entries with indexes into Conf->AffixData array. + */ Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); ! ! curaffix = -1; ! for (i = 0; i < Conf->nspell; i++) { ! if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN)) { ! curaffix++; ! Assert(curaffix < naffix); ! Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag); } ! ! Conf->Spell[i]->p.d.affix = curaffix; Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); } Conf->lenAffixData = Conf->nAffixData = naffix; + qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); *************** *** 1085,1091 **** } static void ! mkVoidAffix(IspellDict * Conf, int issuffix, int startsuffix) { int i, cnt = 0; --- 1094,1100 ---- } static void ! mkVoidAffix(IspellDict * Conf, bool issuffix, int startsuffix) { int i, cnt = 0; *************** *** 1145,1151 **** AFFIX *Affix; size_t i; CMPDAffix *ptr; ! int firstsuffix = -1; checkTmpCtx(); --- 1154,1160 ---- AFFIX *Affix; size_t i; CMPDAffix *ptr; ! int firstsuffix = Conf->naffixes; checkTmpCtx(); *************** *** 1160,1166 **** for (i = 0; i < Conf->naffixes; i++) { Affix = &(((AFFIX *) Conf->Affix)[i]); ! if (Affix->type == FF_SUFFIX && firstsuffix < 0) firstsuffix = i; if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && --- 1169,1175 ---- for (i = 0; i < Conf->naffixes; i++) { Affix = &(((AFFIX *) Conf->Affix)[i]); ! if (Affix->type == FF_SUFFIX && i < firstsuffix) firstsuffix = i; if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && *************** *** 1185,1196 **** Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); ! mkVoidAffix(Conf, 1, firstsuffix); ! mkVoidAffix(Conf, 0, firstsuffix); } static AffixNodeData * ! FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type) { AffixNodeData *StopLow, *StopHigh, --- 1194,1205 ---- Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); ! mkVoidAffix(Conf, true, firstsuffix); ! mkVoidAffix(Conf, false, firstsuffix); } static AffixNodeData * ! FindAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type) { AffixNodeData *StopLow, *StopHigh, *************** *** 1374,1380 **** plevel = 0; while (pnode) { ! prefix = FinfAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) --- 1383,1389 ---- plevel = 0; while (pnode) { ! prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) *************** *** 1398,1404 **** int baselen = 0; /* find possible suffix */ ! suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ --- 1407,1413 ---- int baselen = 0; /* find possible suffix */ ! suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ *************** *** 1416,1422 **** swrdlen = strlen(newword); while (pnode) { ! prefix = FinfAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) --- 1425,1431 ---- swrdlen = strlen(newword); while (pnode) { ! prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); if (!prefix) break; for (j = 0; j < prefix->naff; j++) *************** *** 1626,1632 **** if (wordlen == level + 1) { /* well, it was last word */ ! var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); var->nstem++; pfree(notprobed); return var; --- 1635,1641 ---- if (wordlen == level + 1) { /* well, it was last word */ ! var->stem[var->nstem] = pnstrdup(word + startpos, wordlen - startpos); var->nstem++; pfree(notprobed); return var; *************** *** 1641,1647 **** ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); /* we can find next word */ level++; ! var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos); var->nstem++; node = Conf->Dictionary; startpos = level; --- 1650,1656 ---- ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); /* we can find next word */ level++; ! var->stem[var->nstem] = pnstrdup(word + startpos, level - startpos); var->nstem++; node = Conf->Dictionary; startpos = level; *************** *** 1656,1662 **** level++; } ! var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); var->nstem++; pfree(notprobed); return var; --- 1665,1671 ---- level++; } ! var->stem[var->nstem] = pnstrdup(word + startpos, wordlen - startpos); var->nstem++; pfree(notprobed); return var; Index: src/backend/tsearch/ts_parse.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/ts_parse.c,v retrieving revision 1.1 diff -c -r1.1 ts_parse.c *** src/backend/tsearch/ts_parse.c 21 Aug 2007 01:11:18 -0000 1.1 --- src/backend/tsearch/ts_parse.c 23 Aug 2007 12:29:51 -0000 *************** *** 308,314 **** { /* * Dictionary normalizes lexemes, so we remove from stack all ! * used lexemes , return to basic mode and redo end of stack * (if it exists) */ if (res) --- 308,314 ---- { /* * Dictionary normalizes lexemes, so we remove from stack all ! * used lexemes, return to basic mode and redo end of stack * (if it exists) */ if (res) *************** *** 571,577 **** } text * ! generatHeadline(HeadlineText * prs) { text *out; int len = 128; --- 571,577 ---- } text * ! generateHeadline(HeadlineText * prs) { text *out; int len = 128; Index: src/backend/tsearch/ts_utils.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/ts_utils.c,v retrieving revision 1.2 diff -c -r1.2 ts_utils.c *** src/backend/tsearch/ts_utils.c 22 Aug 2007 01:39:44 -0000 1.2 --- src/backend/tsearch/ts_utils.c 23 Aug 2007 12:51:06 -0000 *************** *** 63,70 **** --- 63,82 ---- return result; } + static int + comparestr(const void *a, const void *b) + { + return strcmp(*(char **) a, *(char **) b); + } + #define STOPBUFLEN 4096 + /* + * Reads a stopword file. + * + * The file must be in UTF-8 encoding, it will be converted to database + * encoding. + */ void readstoplist(char *in, StopList * s) { *************** *** 97,108 **** if (*buf == '\0') continue; ! if (!pg_verifymbstr(buf, strlen(buf), true)) { FreeFile(hin); ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), ! errmsg("invalid multibyte encoding at line %d in file \"%s\"", line, filename))); } --- 109,120 ---- if (*buf == '\0') continue; ! if (!pg_verify_mbstr(PG_UTF8, buf, strlen(buf), true)) { FreeFile(hin); ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), ! errmsg("invalid UTF-8 encoding at line %d in file \"%s\"", line, filename))); } *************** *** 120,130 **** } } ! ! if (s->wordop) ! stop[s->len] = s->wordop(buf); ! else ! stop[s->len] = pstrdup(buf); (s->len)++; } --- 132,138 ---- } } ! stop[s->len] = recode_and_lowerstr(buf); (s->len)++; } *************** *** 133,149 **** } s->stop = stop; - } ! static int ! comparestr(const void *a, const void *b) ! { ! return strcmp(*(char **) a, *(char **) b); ! } ! ! void ! sortstoplist(StopList * s) ! { if (s->stop && s->len > 0) qsort(s->stop, s->len, sizeof(char *), comparestr); } --- 141,148 ---- } s->stop = stop; ! /* Sort to allow binary searching */ if (s->stop && s->len > 0) qsort(s->stop, s->len, sizeof(char *), comparestr); } Index: src/backend/tsearch/wparser.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/wparser.c,v retrieving revision 1.2 diff -c -r1.2 wparser.c *** src/backend/tsearch/wparser.c 22 Aug 2007 01:39:45 -0000 1.2 --- src/backend/tsearch/wparser.c 23 Aug 2007 12:29:59 -0000 *************** *** 325,331 **** PointerGetDatum(prsoptions), PointerGetDatum(query)); ! out = generatHeadline(&prs); PG_FREE_IF_COPY(in, 1); PG_FREE_IF_COPY(query, 2); --- 325,331 ---- PointerGetDatum(prsoptions), PointerGetDatum(query)); ! out = generateHeadline(&prs); PG_FREE_IF_COPY(in, 1); PG_FREE_IF_COPY(query, 2); Index: src/include/tsearch/ts_public.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/tsearch/ts_public.h,v retrieving revision 1.2 diff -c -r1.2 ts_public.h *** src/include/tsearch/ts_public.h 22 Aug 2007 01:39:46 -0000 1.2 --- src/include/tsearch/ts_public.h 23 Aug 2007 10:54:50 -0000 *************** *** 71,80 **** { int len; char **stop; - char *(*wordop) (char *); } StopList; - extern void sortstoplist(StopList * s); extern void readstoplist(char *in, StopList * s); extern bool searchstoplist(StopList * s, char *key); --- 71,78 ---- Index: src/include/tsearch/ts_utils.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/tsearch/ts_utils.h,v retrieving revision 1.1 diff -c -r1.1 ts_utils.h *** src/include/tsearch/ts_utils.h 21 Aug 2007 01:11:29 -0000 1.1 --- src/include/tsearch/ts_utils.h 23 Aug 2007 12:30:32 -0000 *************** *** 102,108 **** * headline framework, flow in common to generate: * 1 parse text with hlparsetext * 2 parser-specific function to find part ! * 3 generatHeadline to generate result text */ typedef struct --- 102,108 ---- * headline framework, flow in common to generate: * 1 parse text with hlparsetext * 2 parser-specific function to find part ! * 3 generateHeadline to generate result text */ typedef struct *************** *** 131,137 **** extern void hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen); ! extern text *generatHeadline(HeadlineText * prs); /* * token/node types for parsing --- 131,137 ---- extern void hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen); ! extern text *generateHeadline(HeadlineText * prs); /* * token/node types for parsing Index: src/include/tsearch/dicts/spell.h =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/tsearch/dicts/spell.h,v retrieving revision 1.1 diff -c -r1.1 spell.h *** src/include/tsearch/dicts/spell.h 21 Aug 2007 01:11:29 -0000 1.1 --- src/include/tsearch/dicts/spell.h 23 Aug 2007 14:02:52 -0000 *************** *** 18,23 **** --- 18,29 ---- #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h" + /* + * Max length of a flag name. Names longer than this will be truncated + * to the maximum. + */ + #define MAXFLAGLEN 16 + struct SPNode; typedef struct *************** *** 54,67 **** { union { ! char flag[16]; struct { int affix; int len; } d; } p; ! char word[1]; } SPELL; #define SPELLHDRSZ (offsetof(SPELL, word)) --- 60,76 ---- { union { ! /* flag is filled in by NIImportDictionary, and after NISortDictionary, ! * d is used and flag is invalid. ! */ ! char flag[MAXFLAGLEN]; struct { int affix; int len; } d; } p; ! char word[1]; /* variable length, null-terminated */ } SPELL; #define SPELLHDRSZ (offsetof(SPELL, word)) *************** *** 90,95 **** --- 99,109 ---- #define FF_COMPOUNDPERMITFLAG 0x10 #define FF_COMPOUNDFORBIDFLAG 0x20 #define FF_CROSSPRODUCT 0x40 + + /* + * don't change the ordering of these because it's + * taken advantage of in initialization + */ #define FF_SUFFIX 1 #define FF_PREFIX 0 *************** *** 126,134 **** int naffixes; AFFIX *Affix; ! int nspell; ! int mspell; SPELL **Spell; AffixNode *Suffix; AffixNode *Prefix; --- 140,150 ---- int naffixes; AFFIX *Affix; ! /* Temporary array of all words in the dict file. Only used during ! * initialization */ SPELL **Spell; + int nspell; /* number of entries in Spell-array */ + int mspell; /* allocated length of Spell-array */ AffixNode *Suffix; AffixNode *Prefix;
pgsql-patches by date: