From 3f330317ab4973178423aba750d6d0ca5ce0024a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 2 Feb 2006 17:15:41 -0800 Subject: [TEXTSEARCH]: Fix broken good shift array calculation in Boyer-Moore The current logic does not calculate correctly the good shift array: Let x be the pattern that is being searched. Let y be the block of data. The good shift array aligns the segment: x[i+1 ... m-1] = y[i+j+1 ... j+m-1] with its rightmost occurrence in x that fulfils x[i] neq y[i+j]. In previous version, the good shift array for the pattern ANPANMAN is: [1, 8, 3, 8, 8, 8, 8, 8] and should be: [1, 8, 3, 6, 6, 6, 6, 6] Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- lib/ts_bm.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'lib/ts_bm.c') diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 8a8b3a16133e..c4c1ac5fbd1a 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -94,10 +94,28 @@ next: bs = bm->bad_shift[text[shift-i]]; return UINT_MAX; } +static int subpattern(u8 *pattern, int i, int j, int g) +{ + int x = i+g-1, y = j+g-1, ret = 0; + + while(pattern[x--] == pattern[y--]) { + if (y < 0) { + ret = 1; + break; + } + if (--g == 0) { + ret = pattern[i-1] != pattern[j-1]; + break; + } + } + + return ret; +} + static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, unsigned int len) { - int i, j, ended, l[ASIZE]; + int i, j, g; for (i = 0; i < ASIZE; i++) bm->bad_shift[i] = len; @@ -106,23 +124,15 @@ static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, /* Compute the good shift array, used to match reocurrences * of a subpattern */ - for (i = 1; i < bm->patlen; i++) { - for (j = 0; j < bm->patlen && bm->pattern[bm->patlen - 1 - j] - == bm->pattern[bm->patlen - 1 - i - j]; j++); - l[i] = j; - } - bm->good_shift[0] = 1; for (i = 1; i < bm->patlen; i++) bm->good_shift[i] = bm->patlen; - for (i = bm->patlen - 1; i > 0; i--) - bm->good_shift[l[i]] = i; - ended = 0; - for (i = 0; i < bm->patlen; i++) { - if (l[i] == bm->patlen - 1 - i) - ended = i; - if (ended) - bm->good_shift[i] = ended; + for (i = bm->patlen-1, g = 1; i > 0; g++, i--) { + for (j = i-1; j >= 1-g ; j--) + if (subpattern(bm->pattern, i, j, g)) { + bm->good_shift[g] = bm->patlen-j-g; + break; + } } } -- cgit v1.2.2