diff options
author | Thomas Graf <tgraf@suug.ch> | 2005-06-23 23:49:30 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-06-23 23:49:30 -0400 |
commit | 2de4ff7bd658c97fb357efa3095a509674dacb5a (patch) | |
tree | 49036dbf594317a6a17ff4e56f65158a6aeacbda /lib | |
parent | 5f8ef48d240963093451bcf83df89f1a1364f51d (diff) |
[LIB]: Textsearch infrastructure.
The textsearch infrastructure provides text searching
facitilies for both linear and non-linear data.
Individual search algorithms are implemented in modules
and chosen by the user.
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 8 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/textsearch.c | 317 |
3 files changed, 326 insertions, 1 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 2d4d4e3bc4aa..5bc2d523e6d1 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
@@ -63,5 +63,11 @@ config REED_SOLOMON_ENC16 | |||
63 | config REED_SOLOMON_DEC16 | 63 | config REED_SOLOMON_DEC16 |
64 | boolean | 64 | boolean |
65 | 65 | ||
66 | endmenu | 66 | config TEXTSEARCH |
67 | boolean "Textsearch infrastructure" | ||
68 | default y | ||
69 | help | ||
70 | Say Y here if you want to provide a textsearch infrastructure | ||
71 | to other subsystems. | ||
67 | 72 | ||
73 | endmenu | ||
diff --git a/lib/Makefile b/lib/Makefile index dcb4231916e2..3e917436ad60 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -36,6 +36,8 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ | |||
36 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ | 36 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ |
37 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ | 37 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ |
38 | 38 | ||
39 | lib-$(CONFIG_TEXTSEARCH) += textsearch.o | ||
40 | |||
39 | hostprogs-y := gen_crc32table | 41 | hostprogs-y := gen_crc32table |
40 | clean-files := crc32table.h | 42 | clean-files := crc32table.h |
41 | 43 | ||
diff --git a/lib/textsearch.c b/lib/textsearch.c new file mode 100644 index 000000000000..1e934c196f0f --- /dev/null +++ b/lib/textsearch.c | |||
@@ -0,0 +1,317 @@ | |||
1 | /* | ||
2 | * lib/textsearch.c Generic text search interface | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
10 | * Pablo Neira Ayuso <pablo@eurodev.net> | ||
11 | * | ||
12 | * ========================================================================== | ||
13 | * | ||
14 | * INTRODUCTION | ||
15 | * | ||
16 | * The textsearch infrastructure provides text searching facitilies for | ||
17 | * both linear and non-linear data. Individual search algorithms are | ||
18 | * implemented in modules and chosen by the user. | ||
19 | * | ||
20 | * ARCHITECTURE | ||
21 | * | ||
22 | * User | ||
23 | * +----------------+ | ||
24 | * | finish()|<--------------(6)-----------------+ | ||
25 | * |get_next_block()|<--------------(5)---------------+ | | ||
26 | * | | Algorithm | | | ||
27 | * | | +------------------------------+ | ||
28 | * | | | init() find() destroy() | | ||
29 | * | | +------------------------------+ | ||
30 | * | | Core API ^ ^ ^ | ||
31 | * | | +---------------+ (2) (4) (8) | ||
32 | * | (1)|----->| prepare() |---+ | | | ||
33 | * | (3)|----->| find()/next() |-----------+ | | ||
34 | * | (7)|----->| destroy() |----------------------+ | ||
35 | * +----------------+ +---------------+ | ||
36 | * | ||
37 | * (1) User configures a search by calling _prepare() specifying the | ||
38 | * search parameters such as the pattern and algorithm name. | ||
39 | * (2) Core requests the algorithm to allocate and initialize a search | ||
40 | * configuration according to the specified parameters. | ||
41 | * (3) User starts the search(es) by calling _find() or _next() to | ||
42 | * fetch subsequent occurrences. A state variable is provided | ||
43 | * to the algorihtm to store persistant variables. | ||
44 | * (4) Core eventually resets the search offset and forwards the find() | ||
45 | * request to the algorithm. | ||
46 | * (5) Algorithm calls get_next_block() provided by the user continously | ||
47 | * to fetch the data to be searched in block by block. | ||
48 | * (6) Algorithm invokes finish() after the last call to get_next_block | ||
49 | * to clean up any leftovers from get_next_block. (Optional) | ||
50 | * (7) User destroys the configuration by calling _destroy(). | ||
51 | * (8) Core notifies the algorithm to destroy algorithm specific | ||
52 | * allocations. (Optional) | ||
53 | * | ||
54 | * USAGE | ||
55 | * | ||
56 | * Before a search can be performed, a configuration must be created | ||
57 | * by calling textsearch_prepare() specyfing the searching algorithm and | ||
58 | * the pattern to look for. The returned configuration may then be used | ||
59 | * for an arbitary amount of times and even in parallel as long as a | ||
60 | * separate struct ts_state variable is provided to every instance. | ||
61 | * | ||
62 | * The actual search is performed by either calling textsearch_find_- | ||
63 | * continuous() for linear data or by providing an own get_next_block() | ||
64 | * implementation and calling textsearch_find(). Both functions return | ||
65 | * the position of the first occurrence of the patern or UINT_MAX if | ||
66 | * no match was found. Subsequent occurences can be found by calling | ||
67 | * textsearch_next() regardless of the linearity of the data. | ||
68 | * | ||
69 | * Once you're done using a configuration it must be given back via | ||
70 | * textsearch_destroy. | ||
71 | * | ||
72 | * EXAMPLE | ||
73 | * | ||
74 | * int pos; | ||
75 | * struct ts_config *conf; | ||
76 | * struct ts_state state; | ||
77 | * const char *pattern = "chicken"; | ||
78 | * const char *example = "We dance the funky chicken"; | ||
79 | * | ||
80 | * conf = textsearch_prepare("kmp", pattern, strlen(pattern), | ||
81 | * GFP_KERNEL, TS_AUTOLOAD); | ||
82 | * if (IS_ERR(conf)) { | ||
83 | * err = PTR_ERR(conf); | ||
84 | * goto errout; | ||
85 | * } | ||
86 | * | ||
87 | * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); | ||
88 | * if (pos != UINT_MAX) | ||
89 | * panic("Oh my god, dancing chickens at %d\n", pos); | ||
90 | * | ||
91 | * textsearch_destroy(conf); | ||
92 | * | ||
93 | * ========================================================================== | ||
94 | */ | ||
95 | |||
96 | #include <linux/config.h> | ||
97 | #include <linux/module.h> | ||
98 | #include <linux/types.h> | ||
99 | #include <linux/string.h> | ||
100 | #include <linux/init.h> | ||
101 | #include <linux/rcupdate.h> | ||
102 | #include <linux/err.h> | ||
103 | #include <linux/textsearch.h> | ||
104 | |||
105 | static LIST_HEAD(ts_ops); | ||
106 | static DEFINE_SPINLOCK(ts_mod_lock); | ||
107 | |||
108 | static inline struct ts_ops *lookup_ts_algo(const char *name) | ||
109 | { | ||
110 | struct ts_ops *o; | ||
111 | |||
112 | rcu_read_lock(); | ||
113 | list_for_each_entry_rcu(o, &ts_ops, list) { | ||
114 | if (!strcmp(name, o->name)) { | ||
115 | if (!try_module_get(o->owner)) | ||
116 | o = NULL; | ||
117 | rcu_read_unlock(); | ||
118 | return o; | ||
119 | } | ||
120 | } | ||
121 | rcu_read_unlock(); | ||
122 | |||
123 | return NULL; | ||
124 | } | ||
125 | |||
126 | /** | ||
127 | * textsearch_register - register a textsearch module | ||
128 | * @ops: operations lookup table | ||
129 | * | ||
130 | * This function must be called by textsearch modules to announce | ||
131 | * their presence. The specified &@ops must have %name set to a | ||
132 | * unique identifier and the callbacks find(), init(), get_pattern(), | ||
133 | * and get_pattern_len() must be implemented. | ||
134 | * | ||
135 | * Returns 0 or -EEXISTS if another module has already registered | ||
136 | * with same name. | ||
137 | */ | ||
138 | int textsearch_register(struct ts_ops *ops) | ||
139 | { | ||
140 | int err = -EEXIST; | ||
141 | struct ts_ops *o; | ||
142 | |||
143 | if (ops->name == NULL || ops->find == NULL || ops->init == NULL || | ||
144 | ops->get_pattern == NULL || ops->get_pattern_len == NULL) | ||
145 | return -EINVAL; | ||
146 | |||
147 | spin_lock(&ts_mod_lock); | ||
148 | list_for_each_entry(o, &ts_ops, list) { | ||
149 | if (!strcmp(ops->name, o->name)) | ||
150 | goto errout; | ||
151 | } | ||
152 | |||
153 | list_add_tail_rcu(&ops->list, &ts_ops); | ||
154 | err = 0; | ||
155 | errout: | ||
156 | spin_unlock(&ts_mod_lock); | ||
157 | return err; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * textsearch_unregister - unregister a textsearch module | ||
162 | * @ops: operations lookup table | ||
163 | * | ||
164 | * This function must be called by textsearch modules to announce | ||
165 | * their disappearance for examples when the module gets unloaded. | ||
166 | * The &ops parameter must be the same as the one during the | ||
167 | * registration. | ||
168 | * | ||
169 | * Returns 0 on success or -ENOENT if no matching textsearch | ||
170 | * registration was found. | ||
171 | */ | ||
172 | int textsearch_unregister(struct ts_ops *ops) | ||
173 | { | ||
174 | int err = 0; | ||
175 | struct ts_ops *o; | ||
176 | |||
177 | spin_lock(&ts_mod_lock); | ||
178 | list_for_each_entry(o, &ts_ops, list) { | ||
179 | if (o == ops) { | ||
180 | list_del_rcu(&o->list); | ||
181 | goto out; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | err = -ENOENT; | ||
186 | out: | ||
187 | spin_unlock(&ts_mod_lock); | ||
188 | return err; | ||
189 | } | ||
190 | |||
191 | struct ts_linear_state | ||
192 | { | ||
193 | unsigned int len; | ||
194 | const void *data; | ||
195 | }; | ||
196 | |||
197 | static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, | ||
198 | struct ts_config *conf, | ||
199 | struct ts_state *state) | ||
200 | { | ||
201 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | ||
202 | |||
203 | if (likely(consumed < st->len)) { | ||
204 | *dst = st->data + consumed; | ||
205 | return st->len - consumed; | ||
206 | } | ||
207 | |||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | /** | ||
212 | * textsearch_find_continuous - search a pattern in continuous/linear data | ||
213 | * @conf: search configuration | ||
214 | * @state: search state | ||
215 | * @data: data to search in | ||
216 | * @len: length of data | ||
217 | * | ||
218 | * A simplified version of textsearch_find() for continuous/linear data. | ||
219 | * Call textsearch_next() to retrieve subsequent matches. | ||
220 | * | ||
221 | * Returns the position of first occurrence of the pattern or | ||
222 | * UINT_MAX if no occurrence was found. | ||
223 | */ | ||
224 | unsigned int textsearch_find_continuous(struct ts_config *conf, | ||
225 | struct ts_state *state, | ||
226 | const void *data, unsigned int len) | ||
227 | { | ||
228 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | ||
229 | |||
230 | conf->get_next_block = get_linear_data; | ||
231 | st->data = data; | ||
232 | st->len = len; | ||
233 | |||
234 | return textsearch_find(conf, state); | ||
235 | } | ||
236 | |||
237 | /** | ||
238 | * textsearch_prepare - Prepare a search | ||
239 | * @algo: name of search algorithm | ||
240 | * @pattern: pattern data | ||
241 | * @len: length of pattern | ||
242 | * @gfp_mask: allocation mask | ||
243 | * @flags: search flags | ||
244 | * | ||
245 | * Looks up the search algorithm module and creates a new textsearch | ||
246 | * configuration for the specified pattern. Upon completion all | ||
247 | * necessary refcnts are held and the configuration must be put back | ||
248 | * using textsearch_put() after usage. | ||
249 | * | ||
250 | * Note: The format of the pattern may not be compatible between | ||
251 | * the various search algorithms. | ||
252 | * | ||
253 | * Returns a new textsearch configuration according to the specified | ||
254 | * parameters or a ERR_PTR(). | ||
255 | */ | ||
256 | struct ts_config *textsearch_prepare(const char *algo, const void *pattern, | ||
257 | unsigned int len, int gfp_mask, int flags) | ||
258 | { | ||
259 | int err = -ENOENT; | ||
260 | struct ts_config *conf; | ||
261 | struct ts_ops *ops; | ||
262 | |||
263 | ops = lookup_ts_algo(algo); | ||
264 | #ifdef CONFIG_KMOD | ||
265 | /* | ||
266 | * Why not always autoload you may ask. Some users are | ||
267 | * in a situation where requesting a module may deadlock, | ||
268 | * especially when the module is located on a NFS mount. | ||
269 | */ | ||
270 | if (ops == NULL && flags & TS_AUTOLOAD) { | ||
271 | request_module("ts_%s", algo); | ||
272 | ops = lookup_ts_algo(algo); | ||
273 | } | ||
274 | #endif | ||
275 | |||
276 | if (ops == NULL) | ||
277 | goto errout; | ||
278 | |||
279 | conf = ops->init(pattern, len, gfp_mask); | ||
280 | if (IS_ERR(conf)) { | ||
281 | err = PTR_ERR(conf); | ||
282 | goto errout; | ||
283 | } | ||
284 | |||
285 | conf->ops = ops; | ||
286 | return conf; | ||
287 | |||
288 | errout: | ||
289 | if (ops) | ||
290 | module_put(ops->owner); | ||
291 | |||
292 | return ERR_PTR(err); | ||
293 | } | ||
294 | |||
295 | /** | ||
296 | * textsearch_destroy - destroy a search configuration | ||
297 | * @conf: search configuration | ||
298 | * | ||
299 | * Releases all references of the configuration and frees | ||
300 | * up the memory. | ||
301 | */ | ||
302 | void textsearch_destroy(struct ts_config *conf) | ||
303 | { | ||
304 | if (conf->ops) { | ||
305 | if (conf->ops->destroy) | ||
306 | conf->ops->destroy(conf); | ||
307 | module_put(conf->ops->owner); | ||
308 | } | ||
309 | |||
310 | kfree(conf); | ||
311 | } | ||
312 | |||
313 | EXPORT_SYMBOL(textsearch_register); | ||
314 | EXPORT_SYMBOL(textsearch_unregister); | ||
315 | EXPORT_SYMBOL(textsearch_prepare); | ||
316 | EXPORT_SYMBOL(textsearch_find_continuous); | ||
317 | EXPORT_SYMBOL(textsearch_destroy); | ||