aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-28 19:15:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-28 19:15:25 -0400
commit7e0fb73c52c4037b4d5ef9ff56c7296a3151bd92 (patch)
tree9ab023505d388563d937b3c3ac26ef3c2045dba2 /lib
parent4e8440b3b6b801953b2e53c55491cf98fc8f6c01 (diff)
parent4684fe95300c071983f77653e354c040fe80a265 (diff)
Merge branch 'hash' of git://ftp.sciencehorizons.net/linux
Pull string hash improvements from George Spelvin: "This series does several related things: - Makes the dcache hash (fs/namei.c) useful for general kernel use. (Thanks to Bruce for noticing the zero-length corner case) - Converts the string hashes in <linux/sunrpc/svcauth.h> to use the above. - Avoids 64-bit multiplies in hash_64() on 32-bit platforms. Two 32-bit multiplies will do well enough. - Rids the world of the bad hash multipliers in hash_32. This finishes the job started in commit 689de1d6ca95 ("Minimal fix-up of bad hashing behavior of hash_64()") The vast majority of Linux architectures have hardware support for 32x32-bit multiply and so derive no benefit from "simplified" multipliers. The few processors that do not (68000, h8/300 and some models of Microblaze) have arch-specific implementations added. Those patches are last in the series. - Overhauls the dcache hash mixing. The patch in commit 0fed3ac866ea ("namei: Improve hash mixing if CONFIG_DCACHE_WORD_ACCESS") was an off-the-cuff suggestion. Replaced with a much more careful design that's simultaneously faster and better. (My own invention, as there was noting suitable in the literature I could find. Comments welcome!) - Modify the hash_name() loop to skip the initial HASH_MIX(). This would let us salt the hash if we ever wanted to. - Sort out partial_name_hash(). The hash function is declared as using a long state, even though it's truncated to 32 bits at the end and the extra internal state contributes nothing to the result. And some callers do odd things: - fs/hfs/string.c only allocates 32 bits of state - fs/hfsplus/unicode.c uses it to hash 16-bit unicode symbols not bytes - Modify bytemask_from_count to handle inputs of 1..sizeof(long) rather than 0..sizeof(long)-1. This would simplify users other than full_name_hash" Special thanks to Bruce Fields for testing and finding bugs in v1. (I learned some humbling lessons about "obviously correct" code.) On the arch-specific front, the m68k assembly has been tested in a standalone test harness, I've been in contact with the Microblaze maintainers who mostly don't care, as the hardware multiplier is never omitted in real-world applications, and I haven't heard anything from the H8/300 world" * 'hash' of git://ftp.sciencehorizons.net/linux: h8300: Add <asm/hash.h> microblaze: Add <asm/hash.h> m68k: Add <asm/hash.h> <linux/hash.h>: Add support for architecture-specific functions fs/namei.c: Improve dcache hash function Eliminate bad hash multipliers from hash_32() and hash_64() Change hash_64() return value to 32 bits <linux/sunrpc/svcauth.h>: Define hash_str() in terms of hashlen_string() fs/namei.c: Add hashlen_string() function Pull out string hash to <linux/stringhash.h>
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug11
-rw-r--r--lib/Makefile1
-rw-r--r--lib/test_hash.c250
3 files changed, 262 insertions, 0 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e707ab3e1991..77d7d034bac3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1849,6 +1849,17 @@ config TEST_RHASHTABLE
1849 1849
1850 If unsure, say N. 1850 If unsure, say N.
1851 1851
1852config TEST_HASH
1853 tristate "Perform selftest on hash functions"
1854 default n
1855 help
1856 Enable this option to test the kernel's integer (<linux/hash,h>)
1857 and string (<linux/stringhash.h>) hash functions on boot
1858 (or module load).
1859
1860 This is intended to help people writing architecture-specific
1861 optimized versions. If unsure, say N.
1862
1852endmenu # runtime tests 1863endmenu # runtime tests
1853 1864
1854config PROVIDE_OHCI1394_DMA_INIT 1865config PROVIDE_OHCI1394_DMA_INIT
diff --git a/lib/Makefile b/lib/Makefile
index 42b69185f963..499fb354d627 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
48obj-y += kstrtox.o 48obj-y += kstrtox.o
49obj-$(CONFIG_TEST_BPF) += test_bpf.o 49obj-$(CONFIG_TEST_BPF) += test_bpf.o
50obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o 50obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
51obj-$(CONFIG_TEST_HASH) += test_hash.o
51obj-$(CONFIG_TEST_KASAN) += test_kasan.o 52obj-$(CONFIG_TEST_KASAN) += test_kasan.o
52obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o 53obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
53obj-$(CONFIG_TEST_LKM) += test_module.o 54obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/test_hash.c b/lib/test_hash.c
new file mode 100644
index 000000000000..c9549c8b4909
--- /dev/null
+++ b/lib/test_hash.c
@@ -0,0 +1,250 @@
1/*
2 * Test cases for <linux/hash.h> and <linux/stringhash.h>
3 * This just verifies that various ways of computing a hash
4 * produce the same thing and, for cases where a k-bit hash
5 * value is requested, is of the requested size.
6 *
7 * We fill a buffer with a 255-byte null-terminated string,
8 * and use both full_name_hash() and hashlen_string() to hash the
9 * substrings from i to j, where 0 <= i < j < 256.
10 *
11 * The returned values are used to check that __hash_32() and
12 * __hash_32_generic() compute the same thing. Likewise hash_32()
13 * and hash_64().
14 */
15
16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n"
17
18#include <linux/compiler.h>
19#include <linux/types.h>
20#include <linux/module.h>
21#include <linux/hash.h>
22#include <linux/stringhash.h>
23#include <linux/printk.h>
24
25/* 32-bit XORSHIFT generator. Seed must not be zero. */
26static u32 __init __attribute_const__
27xorshift(u32 seed)
28{
29 seed ^= seed << 13;
30 seed ^= seed >> 17;
31 seed ^= seed << 5;
32 return seed;
33}
34
35/* Given a non-zero x, returns a non-zero byte. */
36static u8 __init __attribute_const__
37mod255(u32 x)
38{
39 x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */
40 x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x2fd */
41 x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x100 */
42 x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0xff */
43 return x;
44}
45
46/* Fill the buffer with non-zero bytes. */
47static void __init
48fill_buf(char *buf, size_t len, u32 seed)
49{
50 size_t i;
51
52 for (i = 0; i < len; i++) {
53 seed = xorshift(seed);
54 buf[i] = mod255(seed);
55 }
56}
57
58/*
59 * Test the various integer hash functions. h64 (or its low-order bits)
60 * is the integer to hash. hash_or accumulates the OR of the hash values,
61 * which are later checked to see that they cover all the requested bits.
62 *
63 * Because these functions (as opposed to the string hashes) are all
64 * inline, the code being tested is actually in the module, and you can
65 * recompile and re-test the module without rebooting.
66 */
67static bool __init
68test_int_hash(unsigned long long h64, u32 hash_or[2][33])
69{
70 int k;
71 u32 h0 = (u32)h64, h1, h2;
72
73 /* Test __hash32 */
74 hash_or[0][0] |= h1 = __hash_32(h0);
75#ifdef HAVE_ARCH__HASH_32
76 hash_or[1][0] |= h2 = __hash_32_generic(h0);
77#if HAVE_ARCH__HASH_32 == 1
78 if (h1 != h2) {
79 pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
80 h0, h1, h2);
81 return false;
82 }
83#endif
84#endif
85
86 /* Test k = 1..32 bits */
87 for (k = 1; k <= 32; k++) {
88 u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */
89
90 /* Test hash_32 */
91 hash_or[0][k] |= h1 = hash_32(h0, k);
92 if (h1 > m) {
93 pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m);
94 return false;
95 }
96#ifdef HAVE_ARCH_HASH_32
97 h2 = hash_32_generic(h0, k);
98#if HAVE_ARCH_HASH_32 == 1
99 if (h1 != h2) {
100 pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() "
101 " = %#x", h0, k, h1, h2);
102 return false;
103 }
104#else
105 if (h2 > m) {
106 pr_err("hash_32_generic(%#x, %d) = %#x > %#x",
107 h0, k, h1, m);
108 return false;
109 }
110#endif
111#endif
112 /* Test hash_64 */
113 hash_or[1][k] |= h1 = hash_64(h64, k);
114 if (h1 > m) {
115 pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m);
116 return false;
117 }
118#ifdef HAVE_ARCH_HASH_64
119 h2 = hash_64_generic(h64, k);
120#if HAVE_ARCH_HASH_64 == 1
121 if (h1 != h2) {
122 pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() "
123 "= %#x", h64, k, h1, h2);
124 return false;
125 }
126#else
127 if (h2 > m) {
128 pr_err("hash_64_generic(%#llx, %d) = %#x > %#x",
129 h64, k, h1, m);
130 return false;
131 }
132#endif
133#endif
134 }
135
136 (void)h2; /* Suppress unused variable warning */
137 return true;
138}
139
140#define SIZE 256 /* Run time is cubic in SIZE */
141
142static int __init
143test_hash_init(void)
144{
145 char buf[SIZE+1];
146 u32 string_or = 0, hash_or[2][33] = { 0 };
147 unsigned tests = 0;
148 unsigned long long h64 = 0;
149 int i, j;
150
151 fill_buf(buf, SIZE, 1);
152
153 /* Test every possible non-empty substring in the buffer. */
154 for (j = SIZE; j > 0; --j) {
155 buf[j] = '\0';
156
157 for (i = 0; i <= j; i++) {
158 u64 hashlen = hashlen_string(buf+i);
159 u32 h0 = full_name_hash(buf+i, j-i);
160
161 /* Check that hashlen_string gets the length right */
162 if (hashlen_len(hashlen) != j-i) {
163 pr_err("hashlen_string(%d..%d) returned length"
164 " %u, expected %d",
165 i, j, hashlen_len(hashlen), j-i);
166 return -EINVAL;
167 }
168 /* Check that the hashes match */
169 if (hashlen_hash(hashlen) != h0) {
170 pr_err("hashlen_string(%d..%d) = %08x != "
171 "full_name_hash() = %08x",
172 i, j, hashlen_hash(hashlen), h0);
173 return -EINVAL;
174 }
175
176 string_or |= h0;
177 h64 = h64 << 32 | h0; /* For use with hash_64 */
178 if (!test_int_hash(h64, hash_or))
179 return -EINVAL;
180 tests++;
181 } /* i */
182 } /* j */
183
184 /* The OR of all the hash values should cover all the bits */
185 if (~string_or) {
186 pr_err("OR of all string hash results = %#x != %#x",
187 string_or, -1u);
188 return -EINVAL;
189 }
190 if (~hash_or[0][0]) {
191 pr_err("OR of all __hash_32 results = %#x != %#x",
192 hash_or[0][0], -1u);
193 return -EINVAL;
194 }
195#ifdef HAVE_ARCH__HASH_32
196#if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */
197 if (~hash_or[1][0]) {
198 pr_err("OR of all __hash_32_generic results = %#x != %#x",
199 hash_or[1][0], -1u);
200 return -EINVAL;
201 }
202#endif
203#endif
204
205 /* Likewise for all the i-bit hash values */
206 for (i = 1; i <= 32; i++) {
207 u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */
208
209 if (hash_or[0][i] != m) {
210 pr_err("OR of all hash_32(%d) results = %#x "
211 "(%#x expected)", i, hash_or[0][i], m);
212 return -EINVAL;
213 }
214 if (hash_or[1][i] != m) {
215 pr_err("OR of all hash_64(%d) results = %#x "
216 "(%#x expected)", i, hash_or[1][i], m);
217 return -EINVAL;
218 }
219 }
220
221 /* Issue notices about skipped tests. */
222#ifndef HAVE_ARCH__HASH_32
223 pr_info("__hash_32() has no arch implementation to test.");
224#elif HAVE_ARCH__HASH_32 != 1
225 pr_info("__hash_32() is arch-specific; not compared to generic.");
226#endif
227#ifndef HAVE_ARCH_HASH_32
228 pr_info("hash_32() has no arch implementation to test.");
229#elif HAVE_ARCH_HASH_32 != 1
230 pr_info("hash_32() is arch-specific; not compared to generic.");
231#endif
232#ifndef HAVE_ARCH_HASH_64
233 pr_info("hash_64() has no arch implementation to test.");
234#elif HAVE_ARCH_HASH_64 != 1
235 pr_info("hash_64() is arch-specific; not compared to generic.");
236#endif
237
238 pr_notice("%u tests passed.", tests);
239
240 return 0;
241}
242
243static void __exit test_hash_exit(void)
244{
245}
246
247module_init(test_hash_init); /* Does everything */
248module_exit(test_hash_exit); /* Does nothing */
249
250MODULE_LICENSE("GPL");