diff options
author | Thomas Graf <tgraf@suug.ch> | 2005-06-23 23:59:16 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-06-23 23:59:16 -0400 |
commit | 6408f79cce401e1bfecf923e7156f84f96e021e3 (patch) | |
tree | 203624ffacf60d364293adc47d2f59f6ba81dd35 /include/linux | |
parent | df3fb93ad9ec0b20c785c0ad82d42d159a1af272 (diff) |
[LIB]: Naive finite state machine based textsearch
A finite state machine consists of n states (struct ts_fsm_token)
representing the pattern as a finite automation. The data is read
sequentially on a octet basis. Every state token specifies the number
of recurrences and the type of value accepted which can be either a
specific character or ctype based set of characters. The available
type of recurrences include 1, (0|1), [0 n], and [1 n].
The algorithm differs between strict/non-strict mode specyfing
whether the pattern has to start at the first octect. Strict mode
is enabled by default and can be disabled by inserting
TS_FSM_HEAD_IGNORE as the first token in the chain.
The runtime performance of the algorithm should be around O(n),
however while in strict mode the average runtime can be better.
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/textsearch_fsm.h | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/include/linux/textsearch_fsm.h b/include/linux/textsearch_fsm.h new file mode 100644 index 000000000000..fdfa078c66e5 --- /dev/null +++ b/include/linux/textsearch_fsm.h | |||
@@ -0,0 +1,48 @@ | |||
1 | #ifndef __LINUX_TEXTSEARCH_FSM_H | ||
2 | #define __LINUX_TEXTSEARCH_FSM_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | enum { | ||
7 | TS_FSM_SPECIFIC, /* specific character */ | ||
8 | TS_FSM_WILDCARD, /* any character */ | ||
9 | TS_FSM_DIGIT, /* isdigit() */ | ||
10 | TS_FSM_XDIGIT, /* isxdigit() */ | ||
11 | TS_FSM_PRINT, /* isprint() */ | ||
12 | TS_FSM_ALPHA, /* isalpha() */ | ||
13 | TS_FSM_ALNUM, /* isalnum() */ | ||
14 | TS_FSM_ASCII, /* isascii() */ | ||
15 | TS_FSM_CNTRL, /* iscntrl() */ | ||
16 | TS_FSM_GRAPH, /* isgraph() */ | ||
17 | TS_FSM_LOWER, /* islower() */ | ||
18 | TS_FSM_UPPER, /* isupper() */ | ||
19 | TS_FSM_PUNCT, /* ispunct() */ | ||
20 | TS_FSM_SPACE, /* isspace() */ | ||
21 | __TS_FSM_TYPE_MAX, | ||
22 | }; | ||
23 | #define TS_FSM_TYPE_MAX (__TS_FSM_TYPE_MAX - 1) | ||
24 | |||
25 | enum { | ||
26 | TS_FSM_SINGLE, /* 1 occurrence */ | ||
27 | TS_FSM_PERHAPS, /* 1 or 0 occurrence */ | ||
28 | TS_FSM_ANY, /* 0..n occurrences */ | ||
29 | TS_FSM_MULTI, /* 1..n occurrences */ | ||
30 | TS_FSM_HEAD_IGNORE, /* 0..n ignored occurrences at head */ | ||
31 | __TS_FSM_RECUR_MAX, | ||
32 | }; | ||
33 | #define TS_FSM_RECUR_MAX (__TS_FSM_RECUR_MAX - 1) | ||
34 | |||
35 | /** | ||
36 | * struct ts_fsm_token - state machine token (state) | ||
37 | * @type: type of token | ||
38 | * @recur: number of recurrences | ||
39 | * @value: character value for TS_FSM_SPECIFIC | ||
40 | */ | ||
41 | struct ts_fsm_token | ||
42 | { | ||
43 | __u16 type; | ||
44 | __u8 recur; | ||
45 | __u8 value; | ||
46 | }; | ||
47 | |||
48 | #endif | ||