diff options
author | Ingo Molnar <mingo@kernel.org> | 2014-11-12 09:09:01 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-11-12 09:09:01 -0500 |
commit | 890ca861f868a10617029ffc87eae7d48ea6876c (patch) | |
tree | 713383f4e3bbd94ddb9816a25e6b3911511908f1 /lib/glob.c | |
parent | 03452d27c6cd9cebb59a6bb0fb6bd8557916c263 (diff) | |
parent | 206c5f60a3d902bc4b56dab2de3e88de5eb06108 (diff) |
Merge tag 'v3.18-rc4' into x86/cleanups, to refresh the tree before pulling new changes.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'lib/glob.c')
-rw-r--r-- | lib/glob.c | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/lib/glob.c b/lib/glob.c new file mode 100644 index 000000000000..500fc80d23e1 --- /dev/null +++ b/lib/glob.c | |||
@@ -0,0 +1,287 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/glob.h> | ||
3 | |||
4 | /* | ||
5 | * The only reason this code can be compiled as a module is because the | ||
6 | * ATA code that depends on it can be as well. In practice, they're | ||
7 | * both usually compiled in and the module overhead goes away. | ||
8 | */ | ||
9 | MODULE_DESCRIPTION("glob(7) matching"); | ||
10 | MODULE_LICENSE("Dual MIT/GPL"); | ||
11 | |||
12 | /** | ||
13 | * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) | ||
14 | * @pat: Shell-style pattern to match, e.g. "*.[ch]". | ||
15 | * @str: String to match. The pattern must match the entire string. | ||
16 | * | ||
17 | * Perform shell-style glob matching, returning true (1) if the match | ||
18 | * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). | ||
19 | * | ||
20 | * Pattern metacharacters are ?, *, [ and \. | ||
21 | * (And, inside character classes, !, - and ].) | ||
22 | * | ||
23 | * This is small and simple implementation intended for device blacklists | ||
24 | * where a string is matched against a number of patterns. Thus, it | ||
25 | * does not preprocess the patterns. It is non-recursive, and run-time | ||
26 | * is at most quadratic: strlen(@str)*strlen(@pat). | ||
27 | * | ||
28 | * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); | ||
29 | * it takes 6 passes over the pattern before matching the string. | ||
30 | * | ||
31 | * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT | ||
32 | * treat / or leading . specially; it isn't actually used for pathnames. | ||
33 | * | ||
34 | * Note that according to glob(7) (and unlike bash), character classes | ||
35 | * are complemented by a leading !; this does not support the regex-style | ||
36 | * [^a-z] syntax. | ||
37 | * | ||
38 | * An opening bracket without a matching close is matched literally. | ||
39 | */ | ||
40 | bool __pure glob_match(char const *pat, char const *str) | ||
41 | { | ||
42 | /* | ||
43 | * Backtrack to previous * on mismatch and retry starting one | ||
44 | * character later in the string. Because * matches all characters | ||
45 | * (no exception for /), it can be easily proved that there's | ||
46 | * never a need to backtrack multiple levels. | ||
47 | */ | ||
48 | char const *back_pat = NULL, *back_str = back_str; | ||
49 | |||
50 | /* | ||
51 | * Loop over each token (character or class) in pat, matching | ||
52 | * it against the remaining unmatched tail of str. Return false | ||
53 | * on mismatch, or true after matching the trailing nul bytes. | ||
54 | */ | ||
55 | for (;;) { | ||
56 | unsigned char c = *str++; | ||
57 | unsigned char d = *pat++; | ||
58 | |||
59 | switch (d) { | ||
60 | case '?': /* Wildcard: anything but nul */ | ||
61 | if (c == '\0') | ||
62 | return false; | ||
63 | break; | ||
64 | case '*': /* Any-length wildcard */ | ||
65 | if (*pat == '\0') /* Optimize trailing * case */ | ||
66 | return true; | ||
67 | back_pat = pat; | ||
68 | back_str = --str; /* Allow zero-length match */ | ||
69 | break; | ||
70 | case '[': { /* Character class */ | ||
71 | bool match = false, inverted = (*pat == '!'); | ||
72 | char const *class = pat + inverted; | ||
73 | unsigned char a = *class++; | ||
74 | |||
75 | /* | ||
76 | * Iterate over each span in the character class. | ||
77 | * A span is either a single character a, or a | ||
78 | * range a-b. The first span may begin with ']'. | ||
79 | */ | ||
80 | do { | ||
81 | unsigned char b = a; | ||
82 | |||
83 | if (a == '\0') /* Malformed */ | ||
84 | goto literal; | ||
85 | |||
86 | if (class[0] == '-' && class[1] != ']') { | ||
87 | b = class[1]; | ||
88 | |||
89 | if (b == '\0') | ||
90 | goto literal; | ||
91 | |||
92 | class += 2; | ||
93 | /* Any special action if a > b? */ | ||
94 | } | ||
95 | match |= (a <= c && c <= b); | ||
96 | } while ((a = *class++) != ']'); | ||
97 | |||
98 | if (match == inverted) | ||
99 | goto backtrack; | ||
100 | pat = class; | ||
101 | } | ||
102 | break; | ||
103 | case '\\': | ||
104 | d = *pat++; | ||
105 | /*FALLTHROUGH*/ | ||
106 | default: /* Literal character */ | ||
107 | literal: | ||
108 | if (c == d) { | ||
109 | if (d == '\0') | ||
110 | return true; | ||
111 | break; | ||
112 | } | ||
113 | backtrack: | ||
114 | if (c == '\0' || !back_pat) | ||
115 | return false; /* No point continuing */ | ||
116 | /* Try again from last *, one character later in str. */ | ||
117 | pat = back_pat; | ||
118 | str = ++back_str; | ||
119 | break; | ||
120 | } | ||
121 | } | ||
122 | } | ||
123 | EXPORT_SYMBOL(glob_match); | ||
124 | |||
125 | |||
126 | #ifdef CONFIG_GLOB_SELFTEST | ||
127 | |||
128 | #include <linux/printk.h> | ||
129 | #include <linux/moduleparam.h> | ||
130 | |||
131 | /* Boot with "glob.verbose=1" to show successful tests, too */ | ||
132 | static bool verbose = false; | ||
133 | module_param(verbose, bool, 0); | ||
134 | |||
135 | struct glob_test { | ||
136 | char const *pat, *str; | ||
137 | bool expected; | ||
138 | }; | ||
139 | |||
140 | static bool __pure __init test(char const *pat, char const *str, bool expected) | ||
141 | { | ||
142 | bool match = glob_match(pat, str); | ||
143 | bool success = match == expected; | ||
144 | |||
145 | /* Can't get string literals into a particular section, so... */ | ||
146 | static char const msg_error[] __initconst = | ||
147 | KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; | ||
148 | static char const msg_ok[] __initconst = | ||
149 | KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; | ||
150 | static char const mismatch[] __initconst = "mismatch"; | ||
151 | char const *message; | ||
152 | |||
153 | if (!success) | ||
154 | message = msg_error; | ||
155 | else if (verbose) | ||
156 | message = msg_ok; | ||
157 | else | ||
158 | return success; | ||
159 | |||
160 | printk(message, pat, str, mismatch + 3*match); | ||
161 | return success; | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * The tests are all jammed together in one array to make it simpler | ||
166 | * to place that array in the .init.rodata section. The obvious | ||
167 | * "array of structures containing char *" has no way to force the | ||
168 | * pointed-to strings to be in a particular section. | ||
169 | * | ||
170 | * Anyway, a test consists of: | ||
171 | * 1. Expected glob_match result: '1' or '0'. | ||
172 | * 2. Pattern to match: null-terminated string | ||
173 | * 3. String to match against: null-terminated string | ||
174 | * | ||
175 | * The list of tests is terminated with a final '\0' instead of | ||
176 | * a glob_match result character. | ||
177 | */ | ||
178 | static char const glob_tests[] __initconst = | ||
179 | /* Some basic tests */ | ||
180 | "1" "a\0" "a\0" | ||
181 | "0" "a\0" "b\0" | ||
182 | "0" "a\0" "aa\0" | ||
183 | "0" "a\0" "\0" | ||
184 | "1" "\0" "\0" | ||
185 | "0" "\0" "a\0" | ||
186 | /* Simple character class tests */ | ||
187 | "1" "[a]\0" "a\0" | ||
188 | "0" "[a]\0" "b\0" | ||
189 | "0" "[!a]\0" "a\0" | ||
190 | "1" "[!a]\0" "b\0" | ||
191 | "1" "[ab]\0" "a\0" | ||
192 | "1" "[ab]\0" "b\0" | ||
193 | "0" "[ab]\0" "c\0" | ||
194 | "1" "[!ab]\0" "c\0" | ||
195 | "1" "[a-c]\0" "b\0" | ||
196 | "0" "[a-c]\0" "d\0" | ||
197 | /* Corner cases in character class parsing */ | ||
198 | "1" "[a-c-e-g]\0" "-\0" | ||
199 | "0" "[a-c-e-g]\0" "d\0" | ||
200 | "1" "[a-c-e-g]\0" "f\0" | ||
201 | "1" "[]a-ceg-ik[]\0" "a\0" | ||
202 | "1" "[]a-ceg-ik[]\0" "]\0" | ||
203 | "1" "[]a-ceg-ik[]\0" "[\0" | ||
204 | "1" "[]a-ceg-ik[]\0" "h\0" | ||
205 | "0" "[]a-ceg-ik[]\0" "f\0" | ||
206 | "0" "[!]a-ceg-ik[]\0" "h\0" | ||
207 | "0" "[!]a-ceg-ik[]\0" "]\0" | ||
208 | "1" "[!]a-ceg-ik[]\0" "f\0" | ||
209 | /* Simple wild cards */ | ||
210 | "1" "?\0" "a\0" | ||
211 | "0" "?\0" "aa\0" | ||
212 | "0" "??\0" "a\0" | ||
213 | "1" "?x?\0" "axb\0" | ||
214 | "0" "?x?\0" "abx\0" | ||
215 | "0" "?x?\0" "xab\0" | ||
216 | /* Asterisk wild cards (backtracking) */ | ||
217 | "0" "*??\0" "a\0" | ||
218 | "1" "*??\0" "ab\0" | ||
219 | "1" "*??\0" "abc\0" | ||
220 | "1" "*??\0" "abcd\0" | ||
221 | "0" "??*\0" "a\0" | ||
222 | "1" "??*\0" "ab\0" | ||
223 | "1" "??*\0" "abc\0" | ||
224 | "1" "??*\0" "abcd\0" | ||
225 | "0" "?*?\0" "a\0" | ||
226 | "1" "?*?\0" "ab\0" | ||
227 | "1" "?*?\0" "abc\0" | ||
228 | "1" "?*?\0" "abcd\0" | ||
229 | "1" "*b\0" "b\0" | ||
230 | "1" "*b\0" "ab\0" | ||
231 | "0" "*b\0" "ba\0" | ||
232 | "1" "*b\0" "bb\0" | ||
233 | "1" "*b\0" "abb\0" | ||
234 | "1" "*b\0" "bab\0" | ||
235 | "1" "*bc\0" "abbc\0" | ||
236 | "1" "*bc\0" "bc\0" | ||
237 | "1" "*bc\0" "bbc\0" | ||
238 | "1" "*bc\0" "bcbc\0" | ||
239 | /* Multiple asterisks (complex backtracking) */ | ||
240 | "1" "*ac*\0" "abacadaeafag\0" | ||
241 | "1" "*ac*ae*ag*\0" "abacadaeafag\0" | ||
242 | "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" | ||
243 | "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" | ||
244 | "1" "*abcd*\0" "abcabcabcabcdefg\0" | ||
245 | "1" "*ab*cd*\0" "abcabcabcabcdefg\0" | ||
246 | "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" | ||
247 | "0" "*abcd*\0" "abcabcabcabcefg\0" | ||
248 | "0" "*ab*cd*\0" "abcabcabcabcefg\0"; | ||
249 | |||
250 | static int __init glob_init(void) | ||
251 | { | ||
252 | unsigned successes = 0; | ||
253 | unsigned n = 0; | ||
254 | char const *p = glob_tests; | ||
255 | static char const message[] __initconst = | ||
256 | KERN_INFO "glob: %u self-tests passed, %u failed\n"; | ||
257 | |||
258 | /* | ||
259 | * Tests are jammed together in a string. The first byte is '1' | ||
260 | * or '0' to indicate the expected outcome, or '\0' to indicate the | ||
261 | * end of the tests. Then come two null-terminated strings: the | ||
262 | * pattern and the string to match it against. | ||
263 | */ | ||
264 | while (*p) { | ||
265 | bool expected = *p++ & 1; | ||
266 | char const *pat = p; | ||
267 | |||
268 | p += strlen(p) + 1; | ||
269 | successes += test(pat, p, expected); | ||
270 | p += strlen(p) + 1; | ||
271 | n++; | ||
272 | } | ||
273 | |||
274 | n -= successes; | ||
275 | printk(message, successes, n); | ||
276 | |||
277 | /* What's the errno for "kernel bug detected"? Guess... */ | ||
278 | return n ? -ECANCELED : 0; | ||
279 | } | ||
280 | |||
281 | /* We need a dummy exit function to allow unload */ | ||
282 | static void __exit glob_fini(void) { } | ||
283 | |||
284 | module_init(glob_init); | ||
285 | module_exit(glob_fini); | ||
286 | |||
287 | #endif /* CONFIG_GLOB_SELFTEST */ | ||