diff options
author | Pablo Neira Ayuso <pablo@netfilter.org> | 2006-02-02 20:15:41 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2006-02-02 20:15:41 -0500 |
commit | 3f330317ab4973178423aba750d6d0ca5ce0024a (patch) | |
tree | 177b1a3f213410bc526fda0cdb1b0568ea282d82 | |
parent | f00c401b9b5f0a90e2eb05705f5988fbda0b082b (diff) |
[TEXTSEARCH]: Fix broken good shift array calculation in Boyer-Moore
The current logic does not calculate correctly the good shift array:
Let x be the pattern that is being searched. Let y be the block of data.
The good shift array aligns the segment:
x[i+1 ... m-1] = y[i+j+1 ... j+m-1]
with its rightmost occurrence in x that fulfils x[i] neq y[i+j].
In previous version, the good shift array for the pattern ANPANMAN is:
[1, 8, 3, 8, 8, 8, 8, 8]
and should be:
[1, 8, 3, 6, 6, 6, 6, 6]
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | lib/ts_bm.c | 40 |
1 files changed, 25 insertions, 15 deletions
diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 8a8b3a16133e..c4c1ac5fbd1a 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c | |||
@@ -94,10 +94,28 @@ next: bs = bm->bad_shift[text[shift-i]]; | |||
94 | return UINT_MAX; | 94 | return UINT_MAX; |
95 | } | 95 | } |
96 | 96 | ||
97 | static int subpattern(u8 *pattern, int i, int j, int g) | ||
98 | { | ||
99 | int x = i+g-1, y = j+g-1, ret = 0; | ||
100 | |||
101 | while(pattern[x--] == pattern[y--]) { | ||
102 | if (y < 0) { | ||
103 | ret = 1; | ||
104 | break; | ||
105 | } | ||
106 | if (--g == 0) { | ||
107 | ret = pattern[i-1] != pattern[j-1]; | ||
108 | break; | ||
109 | } | ||
110 | } | ||
111 | |||
112 | return ret; | ||
113 | } | ||
114 | |||
97 | static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, | 115 | static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, |
98 | unsigned int len) | 116 | unsigned int len) |
99 | { | 117 | { |
100 | int i, j, ended, l[ASIZE]; | 118 | int i, j, g; |
101 | 119 | ||
102 | for (i = 0; i < ASIZE; i++) | 120 | for (i = 0; i < ASIZE; i++) |
103 | bm->bad_shift[i] = len; | 121 | bm->bad_shift[i] = len; |
@@ -106,23 +124,15 @@ static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern, | |||
106 | 124 | ||
107 | /* Compute the good shift array, used to match reocurrences | 125 | /* Compute the good shift array, used to match reocurrences |
108 | * of a subpattern */ | 126 | * of a subpattern */ |
109 | for (i = 1; i < bm->patlen; i++) { | ||
110 | for (j = 0; j < bm->patlen && bm->pattern[bm->patlen - 1 - j] | ||
111 | == bm->pattern[bm->patlen - 1 - i - j]; j++); | ||
112 | l[i] = j; | ||
113 | } | ||
114 | |||
115 | bm->good_shift[0] = 1; | 127 | bm->good_shift[0] = 1; |
116 | for (i = 1; i < bm->patlen; i++) | 128 | for (i = 1; i < bm->patlen; i++) |
117 | bm->good_shift[i] = bm->patlen; | 129 | bm->good_shift[i] = bm->patlen; |
118 | for (i = bm->patlen - 1; i > 0; i--) | 130 | for (i = bm->patlen-1, g = 1; i > 0; g++, i--) { |
119 | bm->good_shift[l[i]] = i; | 131 | for (j = i-1; j >= 1-g ; j--) |
120 | ended = 0; | 132 | if (subpattern(bm->pattern, i, j, g)) { |
121 | for (i = 0; i < bm->patlen; i++) { | 133 | bm->good_shift[g] = bm->patlen-j-g; |
122 | if (l[i] == bm->patlen - 1 - i) | 134 | break; |
123 | ended = i; | 135 | } |
124 | if (ended) | ||
125 | bm->good_shift[i] = ended; | ||
126 | } | 136 | } |
127 | } | 137 | } |
128 | 138 | ||