diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index f7e96acc53..306d60bd3b 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -456,7 +456,7 @@ iterate_word_similarity(int *trg2indexes,
lastpos[trgindex] = i;
}
- /* Adjust lower bound if this trigram is present in required substring */
+ /* Adjust upper bound if this trigram is present in required substring */
if (found[trgindex])
{
int prev_lower,
@@ -473,7 +473,7 @@ iterate_word_similarity(int *trg2indexes,
smlr_cur = CALCSML(count, ulen1, ulen2);
- /* Also try to adjust upper bound for greater similarity */
+ /* Also try to adjust lower bound for greater similarity */
tmp_count = count;
tmp_ulen2 = ulen2;
prev_lower = lower;
diff --git a/doc/src/sgml/pgtrgm.sgml b/doc/src/sgml/pgtrgm.sgml
index 338ef30fbc..fb5beb9272 100644
--- a/doc/src/sgml/pgtrgm.sgml
+++ b/doc/src/sgml/pgtrgm.sgml
@@ -99,12 +99,8 @@
real
- Returns a number that indicates how similar the first string
- to the most similar word of the second string. The function searches in
- the second string a most similar word not a most similar substring. The
- range of the result is zero (indicating that the two strings are
- completely dissimilar) to one (indicating that the first string is
- identical to one of the words of the second string).
+ Returns greatest similarity between trigrams set of the first string and
+ any continuous extent of ordered trigrams set of the second string.
@@ -131,6 +127,35 @@
+
+ word_similarity(text, text) requires further
+ explanation. Consider following example.
+
+
+# select word_similarity('word', 'two words');
+ word_similarity
+-----------------
+ 0.8
+(1 row)
+
+
+ First string set of trigrams is
+ {" w"," wo","ord","wor","rd "}.
+ Second string ordered set of trigrams is
+ {" t"," tw",two,"wo "," w"," wo","wor","ord","rds", ds "}.
+ The most similar extent of second string ordered set of trigrams is
+ {" w"," wo","wor","ord"}, and the similarity is
+ 0.8.
+
+
+
+ This function can be approximately understood as greatest similarity between
+ first string and any substring of the second string. However, this function
+ doesn't add paddings to the boundaries of extent. This is why this function
+ is scoring full-word matching more than word to part of word matching. This
+ specialty finds its reflection in the function, quite ambiguous though.
+
+
pg_trgm Operators
@@ -156,9 +181,9 @@
text <% text
boolean
- Returns true if its first argument has the similar word in
- the second argument and they have a similarity that is greater than the
- current word similarity threshold set by
+ Returns true if its second argument has continuous
+ extent of ordered trigrams set which similarity to first argument
+ trigram set is greater than the current word similarity threshold set by
pg_trgm.word_similarity_threshold parameter.
@@ -302,8 +327,9 @@ SELECT t, word_similarity('word', t) AS sml
WHERE 'word' <% t
ORDER BY sml DESC, t;
- This will return all values in the text column that have a word
- which sufficiently similar to word, sorted from best
+ This will return all values in the text column that have an continuous extent
+ in corresponding ordered trigram set which sufficiently similar to
+ trigram set of word, sorted from best
match to worst. The index will be used to make this a fast operation
even over very large data sets.