aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-06 14:16:17 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-08 21:08:44 -0500
commitbfcfaa77bdf0f775263e906015982a608df01c76 (patch)
tree6671137d4af157b851d953b7e2809abbfa809e81
parent9f8050c4f99789d03ca96d4e625bd6637241828f (diff)
vfs: use 'unsigned long' accesses for dcache name comparison and hashing
Ok, this is hacky, and only works on little-endian machines with goo unaligned handling. And even then only with CONFIG_DEBUG_PAGEALLOC disabled, since it can access up to 7 bytes after the pathname. But it runs like a bat out of hell. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/dcache.c23
-rw-r--r--fs/namei.c122
4 files changed, 150 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e189fa..09675d3e0ac3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -82,6 +82,7 @@ config X86
82 select CLKEVT_I8253 82 select CLKEVT_I8253
83 select ARCH_HAVE_NMI_SAFE_CMPXCHG 83 select ARCH_HAVE_NMI_SAFE_CMPXCHG
84 select GENERIC_IOMAP 84 select GENERIC_IOMAP
85 select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
85 86
86config INSTRUCTION_DECODER 87config INSTRUCTION_DECODER
87 def_bool (KPROBES || PERF_EVENTS) 88 def_bool (KPROBES || PERF_EVENTS)
diff --git a/fs/Kconfig b/fs/Kconfig
index d621f02a3f9e..aa195265362f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
4 4
5menu "File systems" 5menu "File systems"
6 6
7# Use unaligned word dcache accesses
8config DCACHE_WORD_ACCESS
9 bool
10
7if BLOCK 11if BLOCK
8 12
9source "fs/ext2/Kconfig" 13source "fs/ext2/Kconfig"
diff --git a/fs/dcache.c b/fs/dcache.c
index bcbdb33fcc20..ffd47a16d870 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
144static inline int dentry_cmp(const unsigned char *cs, size_t scount, 144static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 const unsigned char *ct, size_t tcount) 145 const unsigned char *ct, size_t tcount)
146{ 146{
147#ifdef CONFIG_DCACHE_WORD_ACCESS
148 unsigned long a,b,mask;
149
150 if (unlikely(scount != tcount))
151 return 1;
152
153 for (;;) {
154 a = *(unsigned long *)cs;
155 b = *(unsigned long *)ct;
156 if (tcount < sizeof(unsigned long))
157 break;
158 if (unlikely(a != b))
159 return 1;
160 cs += sizeof(unsigned long);
161 ct += sizeof(unsigned long);
162 tcount -= sizeof(unsigned long);
163 if (!tcount)
164 return 0;
165 }
166 mask = ~(~0ul << tcount*8);
167 return unlikely(!!((a ^ b) & mask));
168#else
147 if (scount != tcount) 169 if (scount != tcount)
148 return 1; 170 return 1;
149 171
@@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
155 tcount--; 177 tcount--;
156 } while (tcount); 178 } while (tcount);
157 return 0; 179 return 0;
180#endif
158} 181}
159 182
160static void __d_free(struct rcu_head *head) 183static void __d_free(struct rcu_head *head)
diff --git a/fs/namei.c b/fs/namei.c
index e2ba62820a0f..378497a744b4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode)
1374 return 1; 1374 return 1;
1375} 1375}
1376 1376
1377/*
1378 * We can do the critical dentry name comparison and hashing
1379 * operations one word at a time, but we are limited to:
1380 *
1381 * - Architectures with fast unaligned word accesses. We could
1382 * do a "get_unaligned()" if this helps and is sufficiently
1383 * fast.
1384 *
1385 * - Little-endian machines (so that we can generate the mask
1386 * of low bytes efficiently). Again, we *could* do a byte
1387 * swapping load on big-endian architectures if that is not
1388 * expensive enough to make the optimization worthless.
1389 *
1390 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
1391 * do not trap on the (extremely unlikely) case of a page
1392 * crossing operation.
1393 *
1394 * - Furthermore, we need an efficient 64-bit compile for the
1395 * 64-bit case in order to generate the "number of bytes in
1396 * the final mask". Again, that could be replaced with a
1397 * efficient population count instruction or similar.
1398 */
1399#ifdef CONFIG_DCACHE_WORD_ACCESS
1400
1401#ifdef CONFIG_64BIT
1402
1403/*
1404 * Jan Achrenius on G+: microoptimized version of
1405 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
1406 * that works for the bytemasks without having to
1407 * mask them first.
1408 */
1409static inline long count_masked_bytes(unsigned long mask)
1410{
1411 return mask*0x0001020304050608 >> 56;
1412}
1413
1414static inline unsigned int fold_hash(unsigned long hash)
1415{
1416 hash += hash >> (8*sizeof(int));
1417 return hash;
1418}
1419
1420#else /* 32-bit case */
1421
1422/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
1423static inline long count_masked_bytes(long mask)
1424{
1425 /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
1426 long a = (0x0ff0001+mask) >> 23;
1427 /* Fix the 1 for 00 case */
1428 return a & mask;
1429}
1430
1431#define fold_hash(x) (x)
1432
1433#endif
1434
1435unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1436{
1437 unsigned long a, mask;
1438 unsigned long hash = 0;
1439
1440 for (;;) {
1441 a = *(unsigned long *)name;
1442 hash *= 9;
1443 if (len < sizeof(unsigned long))
1444 break;
1445 hash += a;
1446 name += sizeof(unsigned long);
1447 len -= sizeof(unsigned long);
1448 if (!len)
1449 goto done;
1450 }
1451 mask = ~(~0ul << len*8);
1452 hash += mask & a;
1453done:
1454 return fold_hash(hash);
1455}
1456EXPORT_SYMBOL(full_name_hash);
1457
1458#define ONEBYTES 0x0101010101010101ul
1459#define SLASHBYTES 0x2f2f2f2f2f2f2f2ful
1460#define HIGHBITS 0x8080808080808080ul
1461
1462/* Return the high bit set in the first byte that is a zero */
1463static inline unsigned long has_zero(unsigned long a)
1464{
1465 return ((a - ONEBYTES) & ~a) & HIGHBITS;
1466}
1467
1468/*
1469 * Calculate the length and hash of the path component, and
1470 * return the length of the component;
1471 */
1472static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1473{
1474 unsigned long a, mask, hash, len;
1475
1476 hash = a = 0;
1477 len = -sizeof(unsigned long);
1478 do {
1479 hash = (hash + a) * 9;
1480 len += sizeof(unsigned long);
1481 a = *(unsigned long *)(name+len);
1482 /* Do we have any NUL or '/' bytes in this word? */
1483 mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
1484 } while (!mask);
1485
1486 /* The mask *below* the first high bit set */
1487 mask = (mask - 1) & ~mask;
1488 mask >>= 7;
1489 hash += a & mask;
1490 *hashp = fold_hash(hash);
1491
1492 return len + count_masked_bytes(mask);
1493}
1494
1495#else
1496
1377unsigned int full_name_hash(const unsigned char *name, unsigned int len) 1497unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1378{ 1498{
1379 unsigned long hash = init_name_hash(); 1499 unsigned long hash = init_name_hash();
@@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1402 return len; 1522 return len;
1403} 1523}
1404 1524
1525#endif
1526
1405/* 1527/*
1406 * Name resolution. 1528 * Name resolution.
1407 * This is the basic name resolution function, turning a pathname into 1529 * This is the basic name resolution function, turning a pathname into