diff options
Diffstat (limited to 'lib/glob.c')
| -rw-r--r-- | lib/glob.c | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/lib/glob.c b/lib/glob.c new file mode 100644 index 000000000000..500fc80d23e1 --- /dev/null +++ b/lib/glob.c | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | #include <linux/module.h> | ||
| 2 | #include <linux/glob.h> | ||
| 3 | |||
| 4 | /* | ||
| 5 | * The only reason this code can be compiled as a module is because the | ||
| 6 | * ATA code that depends on it can be as well. In practice, they're | ||
| 7 | * both usually compiled in and the module overhead goes away. | ||
| 8 | */ | ||
| 9 | MODULE_DESCRIPTION("glob(7) matching"); | ||
| 10 | MODULE_LICENSE("Dual MIT/GPL"); | ||
| 11 | |||
| 12 | /** | ||
| 13 | * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) | ||
| 14 | * @pat: Shell-style pattern to match, e.g. "*.[ch]". | ||
| 15 | * @str: String to match. The pattern must match the entire string. | ||
| 16 | * | ||
| 17 | * Perform shell-style glob matching, returning true (1) if the match | ||
| 18 | * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). | ||
| 19 | * | ||
| 20 | * Pattern metacharacters are ?, *, [ and \. | ||
| 21 | * (And, inside character classes, !, - and ].) | ||
| 22 | * | ||
| 23 | * This is small and simple implementation intended for device blacklists | ||
| 24 | * where a string is matched against a number of patterns. Thus, it | ||
| 25 | * does not preprocess the patterns. It is non-recursive, and run-time | ||
| 26 | * is at most quadratic: strlen(@str)*strlen(@pat). | ||
| 27 | * | ||
| 28 | * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); | ||
| 29 | * it takes 6 passes over the pattern before matching the string. | ||
| 30 | * | ||
| 31 | * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT | ||
| 32 | * treat / or leading . specially; it isn't actually used for pathnames. | ||
| 33 | * | ||
| 34 | * Note that according to glob(7) (and unlike bash), character classes | ||
| 35 | * are complemented by a leading !; this does not support the regex-style | ||
| 36 | * [^a-z] syntax. | ||
| 37 | * | ||
| 38 | * An opening bracket without a matching close is matched literally. | ||
| 39 | */ | ||
| 40 | bool __pure glob_match(char const *pat, char const *str) | ||
| 41 | { | ||
| 42 | /* | ||
| 43 | * Backtrack to previous * on mismatch and retry starting one | ||
| 44 | * character later in the string. Because * matches all characters | ||
| 45 | * (no exception for /), it can be easily proved that there's | ||
| 46 | * never a need to backtrack multiple levels. | ||
| 47 | */ | ||
| 48 | char const *back_pat = NULL, *back_str = back_str; | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Loop over each token (character or class) in pat, matching | ||
| 52 | * it against the remaining unmatched tail of str. Return false | ||
| 53 | * on mismatch, or true after matching the trailing nul bytes. | ||
| 54 | */ | ||
| 55 | for (;;) { | ||
| 56 | unsigned char c = *str++; | ||
| 57 | unsigned char d = *pat++; | ||
| 58 | |||
| 59 | switch (d) { | ||
| 60 | case '?': /* Wildcard: anything but nul */ | ||
| 61 | if (c == '\0') | ||
| 62 | return false; | ||
| 63 | break; | ||
| 64 | case '*': /* Any-length wildcard */ | ||
| 65 | if (*pat == '\0') /* Optimize trailing * case */ | ||
| 66 | return true; | ||
| 67 | back_pat = pat; | ||
| 68 | back_str = --str; /* Allow zero-length match */ | ||
| 69 | break; | ||
| 70 | case '[': { /* Character class */ | ||
| 71 | bool match = false, inverted = (*pat == '!'); | ||
| 72 | char const *class = pat + inverted; | ||
| 73 | unsigned char a = *class++; | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Iterate over each span in the character class. | ||
| 77 | * A span is either a single character a, or a | ||
| 78 | * range a-b. The first span may begin with ']'. | ||
| 79 | */ | ||
| 80 | do { | ||
| 81 | unsigned char b = a; | ||
| 82 | |||
| 83 | if (a == '\0') /* Malformed */ | ||
| 84 | goto literal; | ||
| 85 | |||
| 86 | if (class[0] == '-' && class[1] != ']') { | ||
| 87 | b = class[1]; | ||
| 88 | |||
| 89 | if (b == '\0') | ||
| 90 | goto literal; | ||
| 91 | |||
| 92 | class += 2; | ||
| 93 | /* Any special action if a > b? */ | ||
| 94 | } | ||
| 95 | match |= (a <= c && c <= b); | ||
| 96 | } while ((a = *class++) != ']'); | ||
| 97 | |||
| 98 | if (match == inverted) | ||
| 99 | goto backtrack; | ||
| 100 | pat = class; | ||
| 101 | } | ||
| 102 | break; | ||
| 103 | case '\\': | ||
| 104 | d = *pat++; | ||
| 105 | /*FALLTHROUGH*/ | ||
| 106 | default: /* Literal character */ | ||
| 107 | literal: | ||
| 108 | if (c == d) { | ||
| 109 | if (d == '\0') | ||
| 110 | return true; | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | backtrack: | ||
| 114 | if (c == '\0' || !back_pat) | ||
| 115 | return false; /* No point continuing */ | ||
| 116 | /* Try again from last *, one character later in str. */ | ||
| 117 | pat = back_pat; | ||
| 118 | str = ++back_str; | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | } | ||
| 122 | } | ||
| 123 | EXPORT_SYMBOL(glob_match); | ||
| 124 | |||
| 125 | |||
| 126 | #ifdef CONFIG_GLOB_SELFTEST | ||
| 127 | |||
| 128 | #include <linux/printk.h> | ||
| 129 | #include <linux/moduleparam.h> | ||
| 130 | |||
| 131 | /* Boot with "glob.verbose=1" to show successful tests, too */ | ||
| 132 | static bool verbose = false; | ||
| 133 | module_param(verbose, bool, 0); | ||
| 134 | |||
| 135 | struct glob_test { | ||
| 136 | char const *pat, *str; | ||
| 137 | bool expected; | ||
| 138 | }; | ||
| 139 | |||
| 140 | static bool __pure __init test(char const *pat, char const *str, bool expected) | ||
| 141 | { | ||
| 142 | bool match = glob_match(pat, str); | ||
| 143 | bool success = match == expected; | ||
| 144 | |||
| 145 | /* Can't get string literals into a particular section, so... */ | ||
| 146 | static char const msg_error[] __initconst = | ||
| 147 | KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; | ||
| 148 | static char const msg_ok[] __initconst = | ||
| 149 | KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; | ||
| 150 | static char const mismatch[] __initconst = "mismatch"; | ||
| 151 | char const *message; | ||
| 152 | |||
| 153 | if (!success) | ||
| 154 | message = msg_error; | ||
| 155 | else if (verbose) | ||
| 156 | message = msg_ok; | ||
| 157 | else | ||
| 158 | return success; | ||
| 159 | |||
| 160 | printk(message, pat, str, mismatch + 3*match); | ||
| 161 | return success; | ||
| 162 | } | ||
| 163 | |||
| 164 | /* | ||
| 165 | * The tests are all jammed together in one array to make it simpler | ||
| 166 | * to place that array in the .init.rodata section. The obvious | ||
| 167 | * "array of structures containing char *" has no way to force the | ||
| 168 | * pointed-to strings to be in a particular section. | ||
| 169 | * | ||
| 170 | * Anyway, a test consists of: | ||
| 171 | * 1. Expected glob_match result: '1' or '0'. | ||
| 172 | * 2. Pattern to match: null-terminated string | ||
| 173 | * 3. String to match against: null-terminated string | ||
| 174 | * | ||
| 175 | * The list of tests is terminated with a final '\0' instead of | ||
| 176 | * a glob_match result character. | ||
| 177 | */ | ||
| 178 | static char const glob_tests[] __initconst = | ||
| 179 | /* Some basic tests */ | ||
| 180 | "1" "a\0" "a\0" | ||
| 181 | "0" "a\0" "b\0" | ||
| 182 | "0" "a\0" "aa\0" | ||
| 183 | "0" "a\0" "\0" | ||
| 184 | "1" "\0" "\0" | ||
| 185 | "0" "\0" "a\0" | ||
| 186 | /* Simple character class tests */ | ||
| 187 | "1" "[a]\0" "a\0" | ||
| 188 | "0" "[a]\0" "b\0" | ||
| 189 | "0" "[!a]\0" "a\0" | ||
| 190 | "1" "[!a]\0" "b\0" | ||
| 191 | "1" "[ab]\0" "a\0" | ||
| 192 | "1" "[ab]\0" "b\0" | ||
| 193 | "0" "[ab]\0" "c\0" | ||
| 194 | "1" "[!ab]\0" "c\0" | ||
| 195 | "1" "[a-c]\0" "b\0" | ||
| 196 | "0" "[a-c]\0" "d\0" | ||
| 197 | /* Corner cases in character class parsing */ | ||
| 198 | "1" "[a-c-e-g]\0" "-\0" | ||
| 199 | "0" "[a-c-e-g]\0" "d\0" | ||
| 200 | "1" "[a-c-e-g]\0" "f\0" | ||
| 201 | "1" "[]a-ceg-ik[]\0" "a\0" | ||
| 202 | "1" "[]a-ceg-ik[]\0" "]\0" | ||
| 203 | "1" "[]a-ceg-ik[]\0" "[\0" | ||
| 204 | "1" "[]a-ceg-ik[]\0" "h\0" | ||
| 205 | "0" "[]a-ceg-ik[]\0" "f\0" | ||
| 206 | "0" "[!]a-ceg-ik[]\0" "h\0" | ||
| 207 | "0" "[!]a-ceg-ik[]\0" "]\0" | ||
| 208 | "1" "[!]a-ceg-ik[]\0" "f\0" | ||
| 209 | /* Simple wild cards */ | ||
| 210 | "1" "?\0" "a\0" | ||
| 211 | "0" "?\0" "aa\0" | ||
| 212 | "0" "??\0" "a\0" | ||
| 213 | "1" "?x?\0" "axb\0" | ||
| 214 | "0" "?x?\0" "abx\0" | ||
| 215 | "0" "?x?\0" "xab\0" | ||
| 216 | /* Asterisk wild cards (backtracking) */ | ||
| 217 | "0" "*??\0" "a\0" | ||
| 218 | "1" "*??\0" "ab\0" | ||
| 219 | "1" "*??\0" "abc\0" | ||
| 220 | "1" "*??\0" "abcd\0" | ||
| 221 | "0" "??*\0" "a\0" | ||
| 222 | "1" "??*\0" "ab\0" | ||
| 223 | "1" "??*\0" "abc\0" | ||
| 224 | "1" "??*\0" "abcd\0" | ||
| 225 | "0" "?*?\0" "a\0" | ||
| 226 | "1" "?*?\0" "ab\0" | ||
| 227 | "1" "?*?\0" "abc\0" | ||
| 228 | "1" "?*?\0" "abcd\0" | ||
| 229 | "1" "*b\0" "b\0" | ||
| 230 | "1" "*b\0" "ab\0" | ||
| 231 | "0" "*b\0" "ba\0" | ||
| 232 | "1" "*b\0" "bb\0" | ||
| 233 | "1" "*b\0" "abb\0" | ||
| 234 | "1" "*b\0" "bab\0" | ||
| 235 | "1" "*bc\0" "abbc\0" | ||
| 236 | "1" "*bc\0" "bc\0" | ||
| 237 | "1" "*bc\0" "bbc\0" | ||
| 238 | "1" "*bc\0" "bcbc\0" | ||
| 239 | /* Multiple asterisks (complex backtracking) */ | ||
| 240 | "1" "*ac*\0" "abacadaeafag\0" | ||
| 241 | "1" "*ac*ae*ag*\0" "abacadaeafag\0" | ||
| 242 | "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" | ||
| 243 | "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" | ||
| 244 | "1" "*abcd*\0" "abcabcabcabcdefg\0" | ||
| 245 | "1" "*ab*cd*\0" "abcabcabcabcdefg\0" | ||
| 246 | "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" | ||
| 247 | "0" "*abcd*\0" "abcabcabcabcefg\0" | ||
| 248 | "0" "*ab*cd*\0" "abcabcabcabcefg\0"; | ||
| 249 | |||
| 250 | static int __init glob_init(void) | ||
| 251 | { | ||
| 252 | unsigned successes = 0; | ||
| 253 | unsigned n = 0; | ||
| 254 | char const *p = glob_tests; | ||
| 255 | static char const message[] __initconst = | ||
| 256 | KERN_INFO "glob: %u self-tests passed, %u failed\n"; | ||
| 257 | |||
| 258 | /* | ||
| 259 | * Tests are jammed together in a string. The first byte is '1' | ||
| 260 | * or '0' to indicate the expected outcome, or '\0' to indicate the | ||
| 261 | * end of the tests. Then come two null-terminated strings: the | ||
| 262 | * pattern and the string to match it against. | ||
| 263 | */ | ||
| 264 | while (*p) { | ||
| 265 | bool expected = *p++ & 1; | ||
| 266 | char const *pat = p; | ||
| 267 | |||
| 268 | p += strlen(p) + 1; | ||
| 269 | successes += test(pat, p, expected); | ||
| 270 | p += strlen(p) + 1; | ||
| 271 | n++; | ||
| 272 | } | ||
| 273 | |||
| 274 | n -= successes; | ||
| 275 | printk(message, successes, n); | ||
| 276 | |||
| 277 | /* What's the errno for "kernel bug detected"? Guess... */ | ||
| 278 | return n ? -ECANCELED : 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* We need a dummy exit function to allow unload */ | ||
| 282 | static void __exit glob_fini(void) { } | ||
| 283 | |||
| 284 | module_init(glob_init); | ||
| 285 | module_exit(glob_fini); | ||
| 286 | |||
| 287 | #endif /* CONFIG_GLOB_SELFTEST */ | ||
