diff options
author | Anton Altaparmakov <aia21@cantab.net> | 2005-06-25 09:27:27 -0400 |
---|---|---|
committer | Anton Altaparmakov <aia21@cantab.net> | 2005-06-25 09:27:27 -0400 |
commit | 38b22b6e9f46ab8f73ef5734f0e0a000766a9258 (patch) | |
tree | 2ccc41ef55918d3af43e444bde7648562a031559 /lib | |
parent | 3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4 (diff) | |
parent | b3e112bcc19abd8e9657dca34a87316786e096f3 (diff) |
Automerge with /usr/src/ntfs-2.6.git.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 29 | ||||
-rw-r--r-- | lib/Makefile | 4 | ||||
-rw-r--r-- | lib/textsearch.c | 317 | ||||
-rw-r--r-- | lib/ts_fsm.c | 338 | ||||
-rw-r--r-- | lib/ts_kmp.c | 145 |
5 files changed, 832 insertions, 1 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 2d4d4e3bc4..455833a9e3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -63,5 +63,32 @@ config REED_SOLOMON_ENC16 | |||
63 | config REED_SOLOMON_DEC16 | 63 | config REED_SOLOMON_DEC16 |
64 | boolean | 64 | boolean |
65 | 65 | ||
66 | endmenu | 66 | config TEXTSEARCH |
67 | boolean "Textsearch infrastructure" | ||
68 | default y | ||
69 | help | ||
70 | Say Y here if you want to provide a textsearch infrastructure | ||
71 | to other subsystems. | ||
72 | |||
73 | config TEXTSEARCH_KMP | ||
74 | depends on TEXTSEARCH | ||
75 | tristate "Knuth-Morris-Pratt" | ||
76 | help | ||
77 | Say Y here if you want to be able to search text using the | ||
78 | Knuth-Morris-Pratt textsearch algorithm. | ||
67 | 79 | ||
80 | To compile this code as a module, choose M here: the | ||
81 | module will be called ts_kmp. | ||
82 | |||
83 | config TEXTSEARCH_FSM | ||
84 | depends on TEXTSEARCH | ||
85 | tristate "Finite state machine" | ||
86 | help | ||
87 | Say Y here if you want to be able to search text using a | ||
88 | naive finite state machine approach implementing a subset | ||
89 | of regular expressions. | ||
90 | |||
91 | To compile this code as a module, choose M here: the | ||
92 | module will be called ts_fsm. | ||
93 | |||
94 | endmenu | ||
diff --git a/lib/Makefile b/lib/Makefile index dcb4231916..beed158529 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -36,6 +36,10 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ | |||
36 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ | 36 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ |
37 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ | 37 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ |
38 | 38 | ||
39 | obj-$(CONFIG_TEXTSEARCH) += textsearch.o | ||
40 | obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o | ||
41 | obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o | ||
42 | |||
39 | hostprogs-y := gen_crc32table | 43 | hostprogs-y := gen_crc32table |
40 | clean-files := crc32table.h | 44 | clean-files := crc32table.h |
41 | 45 | ||
diff --git a/lib/textsearch.c b/lib/textsearch.c new file mode 100644 index 0000000000..1e934c196f --- /dev/null +++ b/lib/textsearch.c | |||
@@ -0,0 +1,317 @@ | |||
1 | /* | ||
2 | * lib/textsearch.c Generic text search interface | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
10 | * Pablo Neira Ayuso <pablo@eurodev.net> | ||
11 | * | ||
12 | * ========================================================================== | ||
13 | * | ||
14 | * INTRODUCTION | ||
15 | * | ||
16 | * The textsearch infrastructure provides text searching facitilies for | ||
17 | * both linear and non-linear data. Individual search algorithms are | ||
18 | * implemented in modules and chosen by the user. | ||
19 | * | ||
20 | * ARCHITECTURE | ||
21 | * | ||
22 | * User | ||
23 | * +----------------+ | ||
24 | * | finish()|<--------------(6)-----------------+ | ||
25 | * |get_next_block()|<--------------(5)---------------+ | | ||
26 | * | | Algorithm | | | ||
27 | * | | +------------------------------+ | ||
28 | * | | | init() find() destroy() | | ||
29 | * | | +------------------------------+ | ||
30 | * | | Core API ^ ^ ^ | ||
31 | * | | +---------------+ (2) (4) (8) | ||
32 | * | (1)|----->| prepare() |---+ | | | ||
33 | * | (3)|----->| find()/next() |-----------+ | | ||
34 | * | (7)|----->| destroy() |----------------------+ | ||
35 | * +----------------+ +---------------+ | ||
36 | * | ||
37 | * (1) User configures a search by calling _prepare() specifying the | ||
38 | * search parameters such as the pattern and algorithm name. | ||
39 | * (2) Core requests the algorithm to allocate and initialize a search | ||
40 | * configuration according to the specified parameters. | ||
41 | * (3) User starts the search(es) by calling _find() or _next() to | ||
42 | * fetch subsequent occurrences. A state variable is provided | ||
43 | * to the algorihtm to store persistant variables. | ||
44 | * (4) Core eventually resets the search offset and forwards the find() | ||
45 | * request to the algorithm. | ||
46 | * (5) Algorithm calls get_next_block() provided by the user continously | ||
47 | * to fetch the data to be searched in block by block. | ||
48 | * (6) Algorithm invokes finish() after the last call to get_next_block | ||
49 | * to clean up any leftovers from get_next_block. (Optional) | ||
50 | * (7) User destroys the configuration by calling _destroy(). | ||
51 | * (8) Core notifies the algorithm to destroy algorithm specific | ||
52 | * allocations. (Optional) | ||
53 | * | ||
54 | * USAGE | ||
55 | * | ||
56 | * Before a search can be performed, a configuration must be created | ||
57 | * by calling textsearch_prepare() specyfing the searching algorithm and | ||
58 | * the pattern to look for. The returned configuration may then be used | ||
59 | * for an arbitary amount of times and even in parallel as long as a | ||
60 | * separate struct ts_state variable is provided to every instance. | ||
61 | * | ||
62 | * The actual search is performed by either calling textsearch_find_- | ||
63 | * continuous() for linear data or by providing an own get_next_block() | ||
64 | * implementation and calling textsearch_find(). Both functions return | ||
65 | * the position of the first occurrence of the patern or UINT_MAX if | ||
66 | * no match was found. Subsequent occurences can be found by calling | ||
67 | * textsearch_next() regardless of the linearity of the data. | ||
68 | * | ||
69 | * Once you're done using a configuration it must be given back via | ||
70 | * textsearch_destroy. | ||
71 | * | ||
72 | * EXAMPLE | ||
73 | * | ||
74 | * int pos; | ||
75 | * struct ts_config *conf; | ||
76 | * struct ts_state state; | ||
77 | * const char *pattern = "chicken"; | ||
78 | * const char *example = "We dance the funky chicken"; | ||
79 | * | ||
80 | * conf = textsearch_prepare("kmp", pattern, strlen(pattern), | ||
81 | * GFP_KERNEL, TS_AUTOLOAD); | ||
82 | * if (IS_ERR(conf)) { | ||
83 | * err = PTR_ERR(conf); | ||
84 | * goto errout; | ||
85 | * } | ||
86 | * | ||
87 | * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); | ||
88 | * if (pos != UINT_MAX) | ||
89 | * panic("Oh my god, dancing chickens at %d\n", pos); | ||
90 | * | ||
91 | * textsearch_destroy(conf); | ||
92 | * | ||
93 | * ========================================================================== | ||
94 | */ | ||
95 | |||
96 | #include <linux/config.h> | ||
97 | #include <linux/module.h> | ||
98 | #include <linux/types.h> | ||
99 | #include <linux/string.h> | ||
100 | #include <linux/init.h> | ||
101 | #include <linux/rcupdate.h> | ||
102 | #include <linux/err.h> | ||
103 | #include <linux/textsearch.h> | ||
104 | |||
105 | static LIST_HEAD(ts_ops); | ||
106 | static DEFINE_SPINLOCK(ts_mod_lock); | ||
107 | |||
108 | static inline struct ts_ops *lookup_ts_algo(const char *name) | ||
109 | { | ||
110 | struct ts_ops *o; | ||
111 | |||
112 | rcu_read_lock(); | ||
113 | list_for_each_entry_rcu(o, &ts_ops, list) { | ||
114 | if (!strcmp(name, o->name)) { | ||
115 | if (!try_module_get(o->owner)) | ||
116 | o = NULL; | ||
117 | rcu_read_unlock(); | ||
118 | return o; | ||
119 | } | ||
120 | } | ||
121 | rcu_read_unlock(); | ||
122 | |||
123 | return NULL; | ||
124 | } | ||
125 | |||
126 | /** | ||
127 | * textsearch_register - register a textsearch module | ||
128 | * @ops: operations lookup table | ||
129 | * | ||
130 | * This function must be called by textsearch modules to announce | ||
131 | * their presence. The specified &@ops must have %name set to a | ||
132 | * unique identifier and the callbacks find(), init(), get_pattern(), | ||
133 | * and get_pattern_len() must be implemented. | ||
134 | * | ||
135 | * Returns 0 or -EEXISTS if another module has already registered | ||
136 | * with same name. | ||
137 | */ | ||
138 | int textsearch_register(struct ts_ops *ops) | ||
139 | { | ||
140 | int err = -EEXIST; | ||
141 | struct ts_ops *o; | ||
142 | |||
143 | if (ops->name == NULL || ops->find == NULL || ops->init == NULL || | ||
144 | ops->get_pattern == NULL || ops->get_pattern_len == NULL) | ||
145 | return -EINVAL; | ||
146 | |||
147 | spin_lock(&ts_mod_lock); | ||
148 | list_for_each_entry(o, &ts_ops, list) { | ||
149 | if (!strcmp(ops->name, o->name)) | ||
150 | goto errout; | ||
151 | } | ||
152 | |||
153 | list_add_tail_rcu(&ops->list, &ts_ops); | ||
154 | err = 0; | ||
155 | errout: | ||
156 | spin_unlock(&ts_mod_lock); | ||
157 | return err; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * textsearch_unregister - unregister a textsearch module | ||
162 | * @ops: operations lookup table | ||
163 | * | ||
164 | * This function must be called by textsearch modules to announce | ||
165 | * their disappearance for examples when the module gets unloaded. | ||
166 | * The &ops parameter must be the same as the one during the | ||
167 | * registration. | ||
168 | * | ||
169 | * Returns 0 on success or -ENOENT if no matching textsearch | ||
170 | * registration was found. | ||
171 | */ | ||
172 | int textsearch_unregister(struct ts_ops *ops) | ||
173 | { | ||
174 | int err = 0; | ||
175 | struct ts_ops *o; | ||
176 | |||
177 | spin_lock(&ts_mod_lock); | ||
178 | list_for_each_entry(o, &ts_ops, list) { | ||
179 | if (o == ops) { | ||
180 | list_del_rcu(&o->list); | ||
181 | goto out; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | err = -ENOENT; | ||
186 | out: | ||
187 | spin_unlock(&ts_mod_lock); | ||
188 | return err; | ||
189 | } | ||
190 | |||
191 | struct ts_linear_state | ||
192 | { | ||
193 | unsigned int len; | ||
194 | const void *data; | ||
195 | }; | ||
196 | |||
197 | static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, | ||
198 | struct ts_config *conf, | ||
199 | struct ts_state *state) | ||
200 | { | ||
201 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | ||
202 | |||
203 | if (likely(consumed < st->len)) { | ||
204 | *dst = st->data + consumed; | ||
205 | return st->len - consumed; | ||
206 | } | ||
207 | |||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | /** | ||
212 | * textsearch_find_continuous - search a pattern in continuous/linear data | ||
213 | * @conf: search configuration | ||
214 | * @state: search state | ||
215 | * @data: data to search in | ||
216 | * @len: length of data | ||
217 | * | ||
218 | * A simplified version of textsearch_find() for continuous/linear data. | ||
219 | * Call textsearch_next() to retrieve subsequent matches. | ||
220 | * | ||
221 | * Returns the position of first occurrence of the pattern or | ||
222 | * UINT_MAX if no occurrence was found. | ||
223 | */ | ||
224 | unsigned int textsearch_find_continuous(struct ts_config *conf, | ||
225 | struct ts_state *state, | ||
226 | const void *data, unsigned int len) | ||
227 | { | ||
228 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | ||
229 | |||
230 | conf->get_next_block = get_linear_data; | ||
231 | st->data = data; | ||
232 | st->len = len; | ||
233 | |||
234 | return textsearch_find(conf, state); | ||
235 | } | ||
236 | |||
237 | /** | ||
238 | * textsearch_prepare - Prepare a search | ||
239 | * @algo: name of search algorithm | ||
240 | * @pattern: pattern data | ||
241 | * @len: length of pattern | ||
242 | * @gfp_mask: allocation mask | ||
243 | * @flags: search flags | ||
244 | * | ||
245 | * Looks up the search algorithm module and creates a new textsearch | ||
246 | * configuration for the specified pattern. Upon completion all | ||
247 | * necessary refcnts are held and the configuration must be put back | ||
248 | * using textsearch_put() after usage. | ||
249 | * | ||
250 | * Note: The format of the pattern may not be compatible between | ||
251 | * the various search algorithms. | ||
252 | * | ||
253 | * Returns a new textsearch configuration according to the specified | ||
254 | * parameters or a ERR_PTR(). | ||
255 | */ | ||
256 | struct ts_config *textsearch_prepare(const char *algo, const void *pattern, | ||
257 | unsigned int len, int gfp_mask, int flags) | ||
258 | { | ||
259 | int err = -ENOENT; | ||
260 | struct ts_config *conf; | ||
261 | struct ts_ops *ops; | ||
262 | |||
263 | ops = lookup_ts_algo(algo); | ||
264 | #ifdef CONFIG_KMOD | ||
265 | /* | ||
266 | * Why not always autoload you may ask. Some users are | ||
267 | * in a situation where requesting a module may deadlock, | ||
268 | * especially when the module is located on a NFS mount. | ||
269 | */ | ||
270 | if (ops == NULL && flags & TS_AUTOLOAD) { | ||
271 | request_module("ts_%s", algo); | ||
272 | ops = lookup_ts_algo(algo); | ||
273 | } | ||
274 | #endif | ||
275 | |||
276 | if (ops == NULL) | ||
277 | goto errout; | ||
278 | |||
279 | conf = ops->init(pattern, len, gfp_mask); | ||
280 | if (IS_ERR(conf)) { | ||
281 | err = PTR_ERR(conf); | ||
282 | goto errout; | ||
283 | } | ||
284 | |||
285 | conf->ops = ops; | ||
286 | return conf; | ||
287 | |||
288 | errout: | ||
289 | if (ops) | ||
290 | module_put(ops->owner); | ||
291 | |||
292 | return ERR_PTR(err); | ||
293 | } | ||
294 | |||
295 | /** | ||
296 | * textsearch_destroy - destroy a search configuration | ||
297 | * @conf: search configuration | ||
298 | * | ||
299 | * Releases all references of the configuration and frees | ||
300 | * up the memory. | ||
301 | */ | ||
302 | void textsearch_destroy(struct ts_config *conf) | ||
303 | { | ||
304 | if (conf->ops) { | ||
305 | if (conf->ops->destroy) | ||
306 | conf->ops->destroy(conf); | ||
307 | module_put(conf->ops->owner); | ||
308 | } | ||
309 | |||
310 | kfree(conf); | ||
311 | } | ||
312 | |||
313 | EXPORT_SYMBOL(textsearch_register); | ||
314 | EXPORT_SYMBOL(textsearch_unregister); | ||
315 | EXPORT_SYMBOL(textsearch_prepare); | ||
316 | EXPORT_SYMBOL(textsearch_find_continuous); | ||
317 | EXPORT_SYMBOL(textsearch_destroy); | ||
diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c new file mode 100644 index 0000000000..d27c0a0729 --- /dev/null +++ b/lib/ts_fsm.c | |||
@@ -0,0 +1,338 @@ | |||
1 | /* | ||
2 | * lib/ts_fsm.c A naive finite state machine text search approach | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
10 | * | ||
11 | * ========================================================================== | ||
12 | * | ||
13 | * A finite state machine consists of n states (struct ts_fsm_token) | ||
14 | * representing the pattern as a finite automation. The data is read | ||
15 | * sequentially on a octet basis. Every state token specifies the number | ||
16 | * of recurrences and the type of value accepted which can be either a | ||
17 | * specific character or ctype based set of characters. The available | ||
18 | * type of recurrences include 1, (0|1), [0 n], and [1 n]. | ||
19 | * | ||
20 | * The algorithm differs between strict/non-strict mode specyfing | ||
21 | * whether the pattern has to start at the first octect. Strict mode | ||
22 | * is enabled by default and can be disabled by inserting | ||
23 | * TS_FSM_HEAD_IGNORE as the first token in the chain. | ||
24 | * | ||
25 | * The runtime performance of the algorithm should be around O(n), | ||
26 | * however while in strict mode the average runtime can be better. | ||
27 | */ | ||
28 | |||
29 | #include <linux/config.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/types.h> | ||
32 | #include <linux/string.h> | ||
33 | #include <linux/ctype.h> | ||
34 | #include <linux/textsearch.h> | ||
35 | #include <linux/textsearch_fsm.h> | ||
36 | |||
37 | struct ts_fsm | ||
38 | { | ||
39 | unsigned int ntokens; | ||
40 | struct ts_fsm_token tokens[0]; | ||
41 | }; | ||
42 | |||
43 | /* other values derived from ctype.h */ | ||
44 | #define _A 0x100 /* ascii */ | ||
45 | #define _W 0x200 /* wildcard */ | ||
46 | |||
47 | /* Map to _ctype flags and some magic numbers */ | ||
48 | static u16 token_map[TS_FSM_TYPE_MAX+1] = { | ||
49 | [TS_FSM_SPECIFIC] = 0, | ||
50 | [TS_FSM_WILDCARD] = _W, | ||
51 | [TS_FSM_CNTRL] = _C, | ||
52 | [TS_FSM_LOWER] = _L, | ||
53 | [TS_FSM_UPPER] = _U, | ||
54 | [TS_FSM_PUNCT] = _P, | ||
55 | [TS_FSM_SPACE] = _S, | ||
56 | [TS_FSM_DIGIT] = _D, | ||
57 | [TS_FSM_XDIGIT] = _D | _X, | ||
58 | [TS_FSM_ALPHA] = _U | _L, | ||
59 | [TS_FSM_ALNUM] = _U | _L | _D, | ||
60 | [TS_FSM_PRINT] = _P | _U | _L | _D | _SP, | ||
61 | [TS_FSM_GRAPH] = _P | _U | _L | _D, | ||
62 | [TS_FSM_ASCII] = _A, | ||
63 | }; | ||
64 | |||
65 | static u16 token_lookup_tbl[256] = { | ||
66 | _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 0- 3 */ | ||
67 | _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 4- 7 */ | ||
68 | _W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S, /* 8- 11 */ | ||
69 | _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C, /* 12- 15 */ | ||
70 | _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 16- 19 */ | ||
71 | _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 20- 23 */ | ||
72 | _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 24- 27 */ | ||
73 | _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 28- 31 */ | ||
74 | _W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 32- 35 */ | ||
75 | _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 36- 39 */ | ||
76 | _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 40- 43 */ | ||
77 | _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 44- 47 */ | ||
78 | _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 48- 51 */ | ||
79 | _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 52- 55 */ | ||
80 | _W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P, /* 56- 59 */ | ||
81 | _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 60- 63 */ | ||
82 | _W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, /* 64- 67 */ | ||
83 | _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U, /* 68- 71 */ | ||
84 | _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 72- 75 */ | ||
85 | _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 76- 79 */ | ||
86 | _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 80- 83 */ | ||
87 | _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 84- 87 */ | ||
88 | _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P, /* 88- 91 */ | ||
89 | _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 92- 95 */ | ||
90 | _W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, /* 96- 99 */ | ||
91 | _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L, /* 100-103 */ | ||
92 | _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 104-107 */ | ||
93 | _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 108-111 */ | ||
94 | _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 112-115 */ | ||
95 | _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 116-119 */ | ||
96 | _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P, /* 120-123 */ | ||
97 | _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C, /* 124-127 */ | ||
98 | _W, _W, _W, _W, /* 128-131 */ | ||
99 | _W, _W, _W, _W, /* 132-135 */ | ||
100 | _W, _W, _W, _W, /* 136-139 */ | ||
101 | _W, _W, _W, _W, /* 140-143 */ | ||
102 | _W, _W, _W, _W, /* 144-147 */ | ||
103 | _W, _W, _W, _W, /* 148-151 */ | ||
104 | _W, _W, _W, _W, /* 152-155 */ | ||
105 | _W, _W, _W, _W, /* 156-159 */ | ||
106 | _W|_S|_SP, _W|_P, _W|_P, _W|_P, /* 160-163 */ | ||
107 | _W|_P, _W|_P, _W|_P, _W|_P, /* 164-167 */ | ||
108 | _W|_P, _W|_P, _W|_P, _W|_P, /* 168-171 */ | ||
109 | _W|_P, _W|_P, _W|_P, _W|_P, /* 172-175 */ | ||
110 | _W|_P, _W|_P, _W|_P, _W|_P, /* 176-179 */ | ||
111 | _W|_P, _W|_P, _W|_P, _W|_P, /* 180-183 */ | ||
112 | _W|_P, _W|_P, _W|_P, _W|_P, /* 184-187 */ | ||
113 | _W|_P, _W|_P, _W|_P, _W|_P, /* 188-191 */ | ||
114 | _W|_U, _W|_U, _W|_U, _W|_U, /* 192-195 */ | ||
115 | _W|_U, _W|_U, _W|_U, _W|_U, /* 196-199 */ | ||
116 | _W|_U, _W|_U, _W|_U, _W|_U, /* 200-203 */ | ||
117 | _W|_U, _W|_U, _W|_U, _W|_U, /* 204-207 */ | ||
118 | _W|_U, _W|_U, _W|_U, _W|_U, /* 208-211 */ | ||
119 | _W|_U, _W|_U, _W|_U, _W|_P, /* 212-215 */ | ||
120 | _W|_U, _W|_U, _W|_U, _W|_U, /* 216-219 */ | ||
121 | _W|_U, _W|_U, _W|_U, _W|_L, /* 220-223 */ | ||
122 | _W|_L, _W|_L, _W|_L, _W|_L, /* 224-227 */ | ||
123 | _W|_L, _W|_L, _W|_L, _W|_L, /* 228-231 */ | ||
124 | _W|_L, _W|_L, _W|_L, _W|_L, /* 232-235 */ | ||
125 | _W|_L, _W|_L, _W|_L, _W|_L, /* 236-239 */ | ||
126 | _W|_L, _W|_L, _W|_L, _W|_L, /* 240-243 */ | ||
127 | _W|_L, _W|_L, _W|_L, _W|_P, /* 244-247 */ | ||
128 | _W|_L, _W|_L, _W|_L, _W|_L, /* 248-251 */ | ||
129 | _W|_L, _W|_L, _W|_L, _W|_L}; /* 252-255 */ | ||
130 | |||
131 | static inline int match_token(struct ts_fsm_token *t, u8 d) | ||
132 | { | ||
133 | if (t->type) | ||
134 | return (token_lookup_tbl[d] & t->type) != 0; | ||
135 | else | ||
136 | return t->value == d; | ||
137 | } | ||
138 | |||
139 | static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state) | ||
140 | { | ||
141 | struct ts_fsm *fsm = ts_config_priv(conf); | ||
142 | struct ts_fsm_token *cur = NULL, *next; | ||
143 | unsigned int match_start, block_idx = 0, tok_idx; | ||
144 | unsigned block_len = 0, strict, consumed = state->offset; | ||
145 | const u8 *data; | ||
146 | |||
147 | #define GET_NEXT_BLOCK() \ | ||
148 | ({ consumed += block_idx; \ | ||
149 | block_idx = 0; \ | ||
150 | block_len = conf->get_next_block(consumed, &data, conf, state); }) | ||
151 | |||
152 | #define TOKEN_MISMATCH() \ | ||
153 | do { \ | ||
154 | if (strict) \ | ||
155 | goto no_match; \ | ||
156 | block_idx++; \ | ||
157 | goto startover; \ | ||
158 | } while(0) | ||
159 | |||
160 | #define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK()) | ||
161 | |||
162 | if (end_of_data()) | ||
163 | goto no_match; | ||
164 | |||
165 | strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE; | ||
166 | |||
167 | startover: | ||
168 | match_start = consumed + block_idx; | ||
169 | |||
170 | for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) { | ||
171 | cur = &fsm->tokens[tok_idx]; | ||
172 | |||
173 | if (likely(tok_idx < (fsm->ntokens - 1))) | ||
174 | next = &fsm->tokens[tok_idx + 1]; | ||
175 | else | ||
176 | next = NULL; | ||
177 | |||
178 | switch (cur->recur) { | ||
179 | case TS_FSM_SINGLE: | ||
180 | if (end_of_data()) | ||
181 | goto no_match; | ||
182 | |||
183 | if (!match_token(cur, data[block_idx])) | ||
184 | TOKEN_MISMATCH(); | ||
185 | break; | ||
186 | |||
187 | case TS_FSM_PERHAPS: | ||
188 | if (end_of_data() || | ||
189 | !match_token(cur, data[block_idx])) | ||
190 | continue; | ||
191 | break; | ||
192 | |||
193 | case TS_FSM_MULTI: | ||
194 | if (end_of_data()) | ||
195 | goto no_match; | ||
196 | |||
197 | if (!match_token(cur, data[block_idx])) | ||
198 | TOKEN_MISMATCH(); | ||
199 | |||
200 | block_idx++; | ||
201 | /* fall through */ | ||
202 | |||
203 | case TS_FSM_ANY: | ||
204 | if (next == NULL) | ||
205 | goto found_match; | ||
206 | |||
207 | if (end_of_data()) | ||
208 | continue; | ||
209 | |||
210 | while (!match_token(next, data[block_idx])) { | ||
211 | if (!match_token(cur, data[block_idx])) | ||
212 | TOKEN_MISMATCH(); | ||
213 | block_idx++; | ||
214 | if (end_of_data()) | ||
215 | goto no_match; | ||
216 | } | ||
217 | continue; | ||
218 | |||
219 | /* | ||
220 | * Optimization: Prefer small local loop over jumping | ||
221 | * back and forth until garbage at head is munched. | ||
222 | */ | ||
223 | case TS_FSM_HEAD_IGNORE: | ||
224 | if (end_of_data()) | ||
225 | continue; | ||
226 | |||
227 | while (!match_token(next, data[block_idx])) { | ||
228 | /* | ||
229 | * Special case, don't start over upon | ||
230 | * a mismatch, give the user the | ||
231 | * chance to specify the type of data | ||
232 | * allowed to be ignored. | ||
233 | */ | ||
234 | if (!match_token(cur, data[block_idx])) | ||
235 | goto no_match; | ||
236 | |||
237 | block_idx++; | ||
238 | if (end_of_data()) | ||
239 | goto no_match; | ||
240 | } | ||
241 | |||
242 | match_start = consumed + block_idx; | ||
243 | continue; | ||
244 | } | ||
245 | |||
246 | block_idx++; | ||
247 | } | ||
248 | |||
249 | if (end_of_data()) | ||
250 | goto found_match; | ||
251 | |||
252 | no_match: | ||
253 | return UINT_MAX; | ||
254 | |||
255 | found_match: | ||
256 | state->offset = consumed + block_idx; | ||
257 | return match_start; | ||
258 | } | ||
259 | |||
260 | static struct ts_config *fsm_init(const void *pattern, unsigned int len, | ||
261 | int gfp_mask) | ||
262 | { | ||
263 | int i, err = -EINVAL; | ||
264 | struct ts_config *conf; | ||
265 | struct ts_fsm *fsm; | ||
266 | struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern; | ||
267 | unsigned int ntokens = len / sizeof(*tokens); | ||
268 | size_t priv_size = sizeof(*fsm) + len; | ||
269 | |||
270 | if (len % sizeof(struct ts_fsm_token) || ntokens < 1) | ||
271 | goto errout; | ||
272 | |||
273 | for (i = 0; i < ntokens; i++) { | ||
274 | struct ts_fsm_token *t = &tokens[i]; | ||
275 | |||
276 | if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX) | ||
277 | goto errout; | ||
278 | |||
279 | if (t->recur == TS_FSM_HEAD_IGNORE && | ||
280 | (i != 0 || i == (ntokens - 1))) | ||
281 | goto errout; | ||
282 | } | ||
283 | |||
284 | conf = alloc_ts_config(priv_size, gfp_mask); | ||
285 | if (IS_ERR(conf)) | ||
286 | return conf; | ||
287 | |||
288 | fsm = ts_config_priv(conf); | ||
289 | fsm->ntokens = ntokens; | ||
290 | memcpy(fsm->tokens, pattern, len); | ||
291 | |||
292 | for (i = 0; i < fsm->ntokens; i++) { | ||
293 | struct ts_fsm_token *t = &fsm->tokens[i]; | ||
294 | t->type = token_map[t->type]; | ||
295 | } | ||
296 | |||
297 | return conf; | ||
298 | |||
299 | errout: | ||
300 | return ERR_PTR(err); | ||
301 | } | ||
302 | |||
303 | static void *fsm_get_pattern(struct ts_config *conf) | ||
304 | { | ||
305 | struct ts_fsm *fsm = ts_config_priv(conf); | ||
306 | return fsm->tokens; | ||
307 | } | ||
308 | |||
309 | static unsigned int fsm_get_pattern_len(struct ts_config *conf) | ||
310 | { | ||
311 | struct ts_fsm *fsm = ts_config_priv(conf); | ||
312 | return fsm->ntokens * sizeof(struct ts_fsm_token); | ||
313 | } | ||
314 | |||
315 | static struct ts_ops fsm_ops = { | ||
316 | .name = "fsm", | ||
317 | .find = fsm_find, | ||
318 | .init = fsm_init, | ||
319 | .get_pattern = fsm_get_pattern, | ||
320 | .get_pattern_len = fsm_get_pattern_len, | ||
321 | .owner = THIS_MODULE, | ||
322 | .list = LIST_HEAD_INIT(fsm_ops.list) | ||
323 | }; | ||
324 | |||
325 | static int __init init_fsm(void) | ||
326 | { | ||
327 | return textsearch_register(&fsm_ops); | ||
328 | } | ||
329 | |||
330 | static void __exit exit_fsm(void) | ||
331 | { | ||
332 | textsearch_unregister(&fsm_ops); | ||
333 | } | ||
334 | |||
335 | MODULE_LICENSE("GPL"); | ||
336 | |||
337 | module_init(init_fsm); | ||
338 | module_exit(exit_fsm); | ||
diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c new file mode 100644 index 0000000000..73266b9755 --- /dev/null +++ b/lib/ts_kmp.c | |||
@@ -0,0 +1,145 @@ | |||
1 | /* | ||
2 | * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
10 | * | ||
11 | * ========================================================================== | ||
12 | * | ||
13 | * Implements a linear-time string-matching algorithm due to Knuth, | ||
14 | * Morris, and Pratt [1]. Their algorithm avoids the explicit | ||
15 | * computation of the transition function DELTA altogether. Its | ||
16 | * matching time is O(n), for n being length(text), using just an | ||
17 | * auxiliary function PI[1..m], for m being length(pattern), | ||
18 | * precomputed from the pattern in time O(m). The array PI allows | ||
19 | * the transition function DELTA to be computed efficiently | ||
20 | * "on the fly" as needed. Roughly speaking, for any state | ||
21 | * "q" = 0,1,...,m and any character "a" in SIGMA, the value | ||
22 | * PI["q"] contains the information that is independent of "a" and | ||
23 | * is needed to compute DELTA("q", "a") [2]. Since the array PI | ||
24 | * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we | ||
25 | * save a factor of |SIGMA| in the preprocessing time by computing | ||
26 | * PI rather than DELTA. | ||
27 | * | ||
28 | * [1] Cormen, Leiserson, Rivest, Stein | ||
29 | * Introdcution to Algorithms, 2nd Edition, MIT Press | ||
30 | * [2] See finite automation theory | ||
31 | */ | ||
32 | |||
33 | #include <linux/config.h> | ||
34 | #include <linux/module.h> | ||
35 | #include <linux/types.h> | ||
36 | #include <linux/string.h> | ||
37 | #include <linux/textsearch.h> | ||
38 | |||
39 | struct ts_kmp | ||
40 | { | ||
41 | u8 * pattern; | ||
42 | unsigned int pattern_len; | ||
43 | unsigned int prefix_tbl[0]; | ||
44 | }; | ||
45 | |||
46 | static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) | ||
47 | { | ||
48 | struct ts_kmp *kmp = ts_config_priv(conf); | ||
49 | unsigned int i, q = 0, text_len, consumed = state->offset; | ||
50 | const u8 *text; | ||
51 | |||
52 | for (;;) { | ||
53 | text_len = conf->get_next_block(consumed, &text, conf, state); | ||
54 | |||
55 | if (unlikely(text_len == 0)) | ||
56 | break; | ||
57 | |||
58 | for (i = 0; i < text_len; i++) { | ||
59 | while (q > 0 && kmp->pattern[q] != text[i]) | ||
60 | q = kmp->prefix_tbl[q - 1]; | ||
61 | if (kmp->pattern[q] == text[i]) | ||
62 | q++; | ||
63 | if (unlikely(q == kmp->pattern_len)) { | ||
64 | state->offset = consumed + i + 1; | ||
65 | return state->offset - kmp->pattern_len; | ||
66 | } | ||
67 | } | ||
68 | |||
69 | consumed += text_len; | ||
70 | } | ||
71 | |||
72 | return UINT_MAX; | ||
73 | } | ||
74 | |||
75 | static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, | ||
76 | unsigned int *prefix_tbl) | ||
77 | { | ||
78 | unsigned int k, q; | ||
79 | |||
80 | for (k = 0, q = 1; q < len; q++) { | ||
81 | while (k > 0 && pattern[k] != pattern[q]) | ||
82 | k = prefix_tbl[k-1]; | ||
83 | if (pattern[k] == pattern[q]) | ||
84 | k++; | ||
85 | prefix_tbl[q] = k; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static struct ts_config *kmp_init(const void *pattern, unsigned int len, | ||
90 | int gfp_mask) | ||
91 | { | ||
92 | struct ts_config *conf; | ||
93 | struct ts_kmp *kmp; | ||
94 | unsigned int prefix_tbl_len = len * sizeof(unsigned int); | ||
95 | size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; | ||
96 | |||
97 | conf = alloc_ts_config(priv_size, gfp_mask); | ||
98 | if (IS_ERR(conf)) | ||
99 | return conf; | ||
100 | |||
101 | kmp = ts_config_priv(conf); | ||
102 | kmp->pattern_len = len; | ||
103 | compute_prefix_tbl(pattern, len, kmp->prefix_tbl); | ||
104 | kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; | ||
105 | memcpy(kmp->pattern, pattern, len); | ||
106 | |||
107 | return conf; | ||
108 | } | ||
109 | |||
110 | static void *kmp_get_pattern(struct ts_config *conf) | ||
111 | { | ||
112 | struct ts_kmp *kmp = ts_config_priv(conf); | ||
113 | return kmp->pattern; | ||
114 | } | ||
115 | |||
116 | static unsigned int kmp_get_pattern_len(struct ts_config *conf) | ||
117 | { | ||
118 | struct ts_kmp *kmp = ts_config_priv(conf); | ||
119 | return kmp->pattern_len; | ||
120 | } | ||
121 | |||
122 | static struct ts_ops kmp_ops = { | ||
123 | .name = "kmp", | ||
124 | .find = kmp_find, | ||
125 | .init = kmp_init, | ||
126 | .get_pattern = kmp_get_pattern, | ||
127 | .get_pattern_len = kmp_get_pattern_len, | ||
128 | .owner = THIS_MODULE, | ||
129 | .list = LIST_HEAD_INIT(kmp_ops.list) | ||
130 | }; | ||
131 | |||
132 | static int __init init_kmp(void) | ||
133 | { | ||
134 | return textsearch_register(&kmp_ops); | ||
135 | } | ||
136 | |||
137 | static void __exit exit_kmp(void) | ||
138 | { | ||
139 | textsearch_unregister(&kmp_ops); | ||
140 | } | ||
141 | |||
142 | MODULE_LICENSE("GPL"); | ||
143 | |||
144 | module_init(init_kmp); | ||
145 | module_exit(exit_kmp); | ||