diff options
| author | Thomas Graf <tgraf@suug.ch> | 2005-06-23 23:49:30 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2005-06-23 23:49:30 -0400 |
| commit | 2de4ff7bd658c97fb357efa3095a509674dacb5a (patch) | |
| tree | 49036dbf594317a6a17ff4e56f65158a6aeacbda /lib | |
| parent | 5f8ef48d240963093451bcf83df89f1a1364f51d (diff) | |
[LIB]: Textsearch infrastructure.
The textsearch infrastructure provides text searching
facitilies for both linear and non-linear data.
Individual search algorithms are implemented in modules
and chosen by the user.
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Kconfig | 8 | ||||
| -rw-r--r-- | lib/Makefile | 2 | ||||
| -rw-r--r-- | lib/textsearch.c | 317 |
3 files changed, 326 insertions, 1 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 2d4d4e3bc4aa..5bc2d523e6d1 100644 --- a/lib/Kconfig +++ b/lib/Kconfig | |||
| @@ -63,5 +63,11 @@ config REED_SOLOMON_ENC16 | |||
| 63 | config REED_SOLOMON_DEC16 | 63 | config REED_SOLOMON_DEC16 |
| 64 | boolean | 64 | boolean |
| 65 | 65 | ||
| 66 | endmenu | 66 | config TEXTSEARCH |
| 67 | boolean "Textsearch infrastructure" | ||
| 68 | default y | ||
| 69 | help | ||
| 70 | Say Y here if you want to provide a textsearch infrastructure | ||
| 71 | to other subsystems. | ||
| 67 | 72 | ||
| 73 | endmenu | ||
diff --git a/lib/Makefile b/lib/Makefile index dcb4231916e2..3e917436ad60 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -36,6 +36,8 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ | |||
| 36 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ | 36 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ |
| 37 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ | 37 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ |
| 38 | 38 | ||
| 39 | lib-$(CONFIG_TEXTSEARCH) += textsearch.o | ||
| 40 | |||
| 39 | hostprogs-y := gen_crc32table | 41 | hostprogs-y := gen_crc32table |
| 40 | clean-files := crc32table.h | 42 | clean-files := crc32table.h |
| 41 | 43 | ||
diff --git a/lib/textsearch.c b/lib/textsearch.c new file mode 100644 index 000000000000..1e934c196f0f --- /dev/null +++ b/lib/textsearch.c | |||
| @@ -0,0 +1,317 @@ | |||
| 1 | /* | ||
| 2 | * lib/textsearch.c Generic text search interface | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version | ||
| 7 | * 2 of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
| 10 | * Pablo Neira Ayuso <pablo@eurodev.net> | ||
| 11 | * | ||
| 12 | * ========================================================================== | ||
| 13 | * | ||
| 14 | * INTRODUCTION | ||
| 15 | * | ||
| 16 | * The textsearch infrastructure provides text searching facitilies for | ||
| 17 | * both linear and non-linear data. Individual search algorithms are | ||
| 18 | * implemented in modules and chosen by the user. | ||
| 19 | * | ||
| 20 | * ARCHITECTURE | ||
| 21 | * | ||
| 22 | * User | ||
| 23 | * +----------------+ | ||
| 24 | * | finish()|<--------------(6)-----------------+ | ||
| 25 | * |get_next_block()|<--------------(5)---------------+ | | ||
| 26 | * | | Algorithm | | | ||
| 27 | * | | +------------------------------+ | ||
| 28 | * | | | init() find() destroy() | | ||
| 29 | * | | +------------------------------+ | ||
| 30 | * | | Core API ^ ^ ^ | ||
| 31 | * | | +---------------+ (2) (4) (8) | ||
| 32 | * | (1)|----->| prepare() |---+ | | | ||
| 33 | * | (3)|----->| find()/next() |-----------+ | | ||
| 34 | * | (7)|----->| destroy() |----------------------+ | ||
| 35 | * +----------------+ +---------------+ | ||
| 36 | * | ||
| 37 | * (1) User configures a search by calling _prepare() specifying the | ||
| 38 | * search parameters such as the pattern and algorithm name. | ||
| 39 | * (2) Core requests the algorithm to allocate and initialize a search | ||
| 40 | * configuration according to the specified parameters. | ||
| 41 | * (3) User starts the search(es) by calling _find() or _next() to | ||
| 42 | * fetch subsequent occurrences. A state variable is provided | ||
| 43 | * to the algorihtm to store persistant variables. | ||
| 44 | * (4) Core eventually resets the search offset and forwards the find() | ||
| 45 | * request to the algorithm. | ||
| 46 | * (5) Algorithm calls get_next_block() provided by the user continously | ||
| 47 | * to fetch the data to be searched in block by block. | ||
| 48 | * (6) Algorithm invokes finish() after the last call to get_next_block | ||
| 49 | * to clean up any leftovers from get_next_block. (Optional) | ||
| 50 | * (7) User destroys the configuration by calling _destroy(). | ||
| 51 | * (8) Core notifies the algorithm to destroy algorithm specific | ||
| 52 | * allocations. (Optional) | ||
| 53 | * | ||
| 54 | * USAGE | ||
| 55 | * | ||
| 56 | * Before a search can be performed, a configuration must be created | ||
| 57 | * by calling textsearch_prepare() specyfing the searching algorithm and | ||
| 58 | * the pattern to look for. The returned configuration may then be used | ||
| 59 | * for an arbitary amount of times and even in parallel as long as a | ||
| 60 | * separate struct ts_state variable is provided to every instance. | ||
| 61 | * | ||
| 62 | * The actual search is performed by either calling textsearch_find_- | ||
| 63 | * continuous() for linear data or by providing an own get_next_block() | ||
| 64 | * implementation and calling textsearch_find(). Both functions return | ||
| 65 | * the position of the first occurrence of the patern or UINT_MAX if | ||
| 66 | * no match was found. Subsequent occurences can be found by calling | ||
| 67 | * textsearch_next() regardless of the linearity of the data. | ||
| 68 | * | ||
| 69 | * Once you're done using a configuration it must be given back via | ||
| 70 | * textsearch_destroy. | ||
| 71 | * | ||
| 72 | * EXAMPLE | ||
| 73 | * | ||
| 74 | * int pos; | ||
| 75 | * struct ts_config *conf; | ||
| 76 | * struct ts_state state; | ||
| 77 | * const char *pattern = "chicken"; | ||
| 78 | * const char *example = "We dance the funky chicken"; | ||
| 79 | * | ||
| 80 | * conf = textsearch_prepare("kmp", pattern, strlen(pattern), | ||
| 81 | * GFP_KERNEL, TS_AUTOLOAD); | ||
| 82 | * if (IS_ERR(conf)) { | ||
| 83 | * err = PTR_ERR(conf); | ||
| 84 | * goto errout; | ||
| 85 | * } | ||
| 86 | * | ||
| 87 | * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); | ||
| 88 | * if (pos != UINT_MAX) | ||
| 89 | * panic("Oh my god, dancing chickens at %d\n", pos); | ||
| 90 | * | ||
| 91 | * textsearch_destroy(conf); | ||
| 92 | * | ||
| 93 | * ========================================================================== | ||
| 94 | */ | ||
| 95 | |||
| 96 | #include <linux/config.h> | ||
| 97 | #include <linux/module.h> | ||
| 98 | #include <linux/types.h> | ||
| 99 | #include <linux/string.h> | ||
| 100 | #include <linux/init.h> | ||
| 101 | #include <linux/rcupdate.h> | ||
| 102 | #include <linux/err.h> | ||
| 103 | #include <linux/textsearch.h> | ||
| 104 | |||
| 105 | static LIST_HEAD(ts_ops); | ||
| 106 | static DEFINE_SPINLOCK(ts_mod_lock); | ||
| 107 | |||
| 108 | static inline struct ts_ops *lookup_ts_algo(const char *name) | ||
| 109 | { | ||
| 110 | struct ts_ops *o; | ||
| 111 | |||
| 112 | rcu_read_lock(); | ||
| 113 | list_for_each_entry_rcu(o, &ts_ops, list) { | ||
| 114 | if (!strcmp(name, o->name)) { | ||
| 115 | if (!try_module_get(o->owner)) | ||
| 116 | o = NULL; | ||
| 117 | rcu_read_unlock(); | ||
| 118 | return o; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | rcu_read_unlock(); | ||
| 122 | |||
| 123 | return NULL; | ||
| 124 | } | ||
| 125 | |||
| 126 | /** | ||
| 127 | * textsearch_register - register a textsearch module | ||
| 128 | * @ops: operations lookup table | ||
| 129 | * | ||
| 130 | * This function must be called by textsearch modules to announce | ||
| 131 | * their presence. The specified &@ops must have %name set to a | ||
| 132 | * unique identifier and the callbacks find(), init(), get_pattern(), | ||
| 133 | * and get_pattern_len() must be implemented. | ||
| 134 | * | ||
| 135 | * Returns 0 or -EEXISTS if another module has already registered | ||
| 136 | * with same name. | ||
| 137 | */ | ||
| 138 | int textsearch_register(struct ts_ops *ops) | ||
| 139 | { | ||
| 140 | int err = -EEXIST; | ||
| 141 | struct ts_ops *o; | ||
| 142 | |||
| 143 | if (ops->name == NULL || ops->find == NULL || ops->init == NULL || | ||
| 144 | ops->get_pattern == NULL || ops->get_pattern_len == NULL) | ||
| 145 | return -EINVAL; | ||
| 146 | |||
| 147 | spin_lock(&ts_mod_lock); | ||
| 148 | list_for_each_entry(o, &ts_ops, list) { | ||
| 149 | if (!strcmp(ops->name, o->name)) | ||
| 150 | goto errout; | ||
| 151 | } | ||
| 152 | |||
| 153 | list_add_tail_rcu(&ops->list, &ts_ops); | ||
| 154 | err = 0; | ||
| 155 | errout: | ||
| 156 | spin_unlock(&ts_mod_lock); | ||
| 157 | return err; | ||
| 158 | } | ||
| 159 | |||
| 160 | /** | ||
| 161 | * textsearch_unregister - unregister a textsearch module | ||
| 162 | * @ops: operations lookup table | ||
| 163 | * | ||
| 164 | * This function must be called by textsearch modules to announce | ||
| 165 | * their disappearance for examples when the module gets unloaded. | ||
| 166 | * The &ops parameter must be the same as the one during the | ||
| 167 | * registration. | ||
| 168 | * | ||
| 169 | * Returns 0 on success or -ENOENT if no matching textsearch | ||
| 170 | * registration was found. | ||
| 171 | */ | ||
| 172 | int textsearch_unregister(struct ts_ops *ops) | ||
| 173 | { | ||
| 174 | int err = 0; | ||
| 175 | struct ts_ops *o; | ||
| 176 | |||
| 177 | spin_lock(&ts_mod_lock); | ||
| 178 | list_for_each_entry(o, &ts_ops, list) { | ||
| 179 | if (o == ops) { | ||
| 180 | list_del_rcu(&o->list); | ||
| 181 | goto out; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | err = -ENOENT; | ||
| 186 | out: | ||
| 187 | spin_unlock(&ts_mod_lock); | ||
| 188 | return err; | ||
| 189 | } | ||
| 190 | |||
| 191 | struct ts_linear_state | ||
| 192 | { | ||
| 193 | unsigned int len; | ||
| 194 | const void *data; | ||
| 195 | }; | ||
| 196 | |||
| 197 | static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, | ||
| 198 | struct ts_config *conf, | ||
| 199 | struct ts_state *state) | ||
| 200 | { | ||
| 201 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | ||
| 202 | |||
| 203 | if (likely(consumed < st->len)) { | ||
| 204 | *dst = st->data + consumed; | ||
| 205 | return st->len - consumed; | ||
| 206 | } | ||
| 207 | |||
| 208 | return 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | /** | ||
| 212 | * textsearch_find_continuous - search a pattern in continuous/linear data | ||
| 213 | * @conf: search configuration | ||
| 214 | * @state: search state | ||
| 215 | * @data: data to search in | ||
| 216 | * @len: length of data | ||
| 217 | * | ||
| 218 | * A simplified version of textsearch_find() for continuous/linear data. | ||
| 219 | * Call textsearch_next() to retrieve subsequent matches. | ||
| 220 | * | ||
| 221 | * Returns the position of first occurrence of the pattern or | ||
| 222 | * UINT_MAX if no occurrence was found. | ||
| 223 | */ | ||
| 224 | unsigned int textsearch_find_continuous(struct ts_config *conf, | ||
| 225 | struct ts_state *state, | ||
| 226 | const void *data, unsigned int len) | ||
| 227 | { | ||
| 228 | struct ts_linear_state *st = (struct ts_linear_state *) state->cb; | ||
| 229 | |||
| 230 | conf->get_next_block = get_linear_data; | ||
| 231 | st->data = data; | ||
| 232 | st->len = len; | ||
| 233 | |||
| 234 | return textsearch_find(conf, state); | ||
| 235 | } | ||
| 236 | |||
| 237 | /** | ||
| 238 | * textsearch_prepare - Prepare a search | ||
| 239 | * @algo: name of search algorithm | ||
| 240 | * @pattern: pattern data | ||
| 241 | * @len: length of pattern | ||
| 242 | * @gfp_mask: allocation mask | ||
| 243 | * @flags: search flags | ||
| 244 | * | ||
| 245 | * Looks up the search algorithm module and creates a new textsearch | ||
| 246 | * configuration for the specified pattern. Upon completion all | ||
| 247 | * necessary refcnts are held and the configuration must be put back | ||
| 248 | * using textsearch_put() after usage. | ||
| 249 | * | ||
| 250 | * Note: The format of the pattern may not be compatible between | ||
| 251 | * the various search algorithms. | ||
| 252 | * | ||
| 253 | * Returns a new textsearch configuration according to the specified | ||
| 254 | * parameters or a ERR_PTR(). | ||
| 255 | */ | ||
| 256 | struct ts_config *textsearch_prepare(const char *algo, const void *pattern, | ||
| 257 | unsigned int len, int gfp_mask, int flags) | ||
| 258 | { | ||
| 259 | int err = -ENOENT; | ||
| 260 | struct ts_config *conf; | ||
| 261 | struct ts_ops *ops; | ||
| 262 | |||
| 263 | ops = lookup_ts_algo(algo); | ||
| 264 | #ifdef CONFIG_KMOD | ||
| 265 | /* | ||
| 266 | * Why not always autoload you may ask. Some users are | ||
| 267 | * in a situation where requesting a module may deadlock, | ||
| 268 | * especially when the module is located on a NFS mount. | ||
| 269 | */ | ||
| 270 | if (ops == NULL && flags & TS_AUTOLOAD) { | ||
| 271 | request_module("ts_%s", algo); | ||
| 272 | ops = lookup_ts_algo(algo); | ||
| 273 | } | ||
| 274 | #endif | ||
| 275 | |||
| 276 | if (ops == NULL) | ||
| 277 | goto errout; | ||
| 278 | |||
| 279 | conf = ops->init(pattern, len, gfp_mask); | ||
| 280 | if (IS_ERR(conf)) { | ||
| 281 | err = PTR_ERR(conf); | ||
| 282 | goto errout; | ||
| 283 | } | ||
| 284 | |||
| 285 | conf->ops = ops; | ||
| 286 | return conf; | ||
| 287 | |||
| 288 | errout: | ||
| 289 | if (ops) | ||
| 290 | module_put(ops->owner); | ||
| 291 | |||
| 292 | return ERR_PTR(err); | ||
| 293 | } | ||
| 294 | |||
| 295 | /** | ||
| 296 | * textsearch_destroy - destroy a search configuration | ||
| 297 | * @conf: search configuration | ||
| 298 | * | ||
| 299 | * Releases all references of the configuration and frees | ||
| 300 | * up the memory. | ||
| 301 | */ | ||
| 302 | void textsearch_destroy(struct ts_config *conf) | ||
| 303 | { | ||
| 304 | if (conf->ops) { | ||
| 305 | if (conf->ops->destroy) | ||
| 306 | conf->ops->destroy(conf); | ||
| 307 | module_put(conf->ops->owner); | ||
| 308 | } | ||
| 309 | |||
| 310 | kfree(conf); | ||
| 311 | } | ||
| 312 | |||
| 313 | EXPORT_SYMBOL(textsearch_register); | ||
| 314 | EXPORT_SYMBOL(textsearch_unregister); | ||
| 315 | EXPORT_SYMBOL(textsearch_prepare); | ||
| 316 | EXPORT_SYMBOL(textsearch_find_continuous); | ||
| 317 | EXPORT_SYMBOL(textsearch_destroy); | ||
