diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-06 14:16:17 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-08 21:08:44 -0500 |
commit | bfcfaa77bdf0f775263e906015982a608df01c76 (patch) | |
tree | 6671137d4af157b851d953b7e2809abbfa809e81 | |
parent | 9f8050c4f99789d03ca96d4e625bd6637241828f (diff) |
vfs: use 'unsigned long' accesses for dcache name comparison and hashing
Ok, this is hacky, and only works on little-endian machines with goo
unaligned handling. And even then only with CONFIG_DEBUG_PAGEALLOC
disabled, since it can access up to 7 bytes after the pathname.
But it runs like a bat out of hell.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | fs/Kconfig | 4 | ||||
-rw-r--r-- | fs/dcache.c | 23 | ||||
-rw-r--r-- | fs/namei.c | 122 |
4 files changed, 150 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bed94e189fa..09675d3e0ac3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -82,6 +82,7 @@ config X86 | |||
82 | select CLKEVT_I8253 | 82 | select CLKEVT_I8253 |
83 | select ARCH_HAVE_NMI_SAFE_CMPXCHG | 83 | select ARCH_HAVE_NMI_SAFE_CMPXCHG |
84 | select GENERIC_IOMAP | 84 | select GENERIC_IOMAP |
85 | select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC | ||
85 | 86 | ||
86 | config INSTRUCTION_DECODER | 87 | config INSTRUCTION_DECODER |
87 | def_bool (KPROBES || PERF_EVENTS) | 88 | def_bool (KPROBES || PERF_EVENTS) |
diff --git a/fs/Kconfig b/fs/Kconfig index d621f02a3f9e..aa195265362f 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -4,6 +4,10 @@ | |||
4 | 4 | ||
5 | menu "File systems" | 5 | menu "File systems" |
6 | 6 | ||
7 | # Use unaligned word dcache accesses | ||
8 | config DCACHE_WORD_ACCESS | ||
9 | bool | ||
10 | |||
7 | if BLOCK | 11 | if BLOCK |
8 | 12 | ||
9 | source "fs/ext2/Kconfig" | 13 | source "fs/ext2/Kconfig" |
diff --git a/fs/dcache.c b/fs/dcache.c index bcbdb33fcc20..ffd47a16d870 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, | |||
144 | static inline int dentry_cmp(const unsigned char *cs, size_t scount, | 144 | static inline int dentry_cmp(const unsigned char *cs, size_t scount, |
145 | const unsigned char *ct, size_t tcount) | 145 | const unsigned char *ct, size_t tcount) |
146 | { | 146 | { |
147 | #ifdef CONFIG_DCACHE_WORD_ACCESS | ||
148 | unsigned long a,b,mask; | ||
149 | |||
150 | if (unlikely(scount != tcount)) | ||
151 | return 1; | ||
152 | |||
153 | for (;;) { | ||
154 | a = *(unsigned long *)cs; | ||
155 | b = *(unsigned long *)ct; | ||
156 | if (tcount < sizeof(unsigned long)) | ||
157 | break; | ||
158 | if (unlikely(a != b)) | ||
159 | return 1; | ||
160 | cs += sizeof(unsigned long); | ||
161 | ct += sizeof(unsigned long); | ||
162 | tcount -= sizeof(unsigned long); | ||
163 | if (!tcount) | ||
164 | return 0; | ||
165 | } | ||
166 | mask = ~(~0ul << tcount*8); | ||
167 | return unlikely(!!((a ^ b) & mask)); | ||
168 | #else | ||
147 | if (scount != tcount) | 169 | if (scount != tcount) |
148 | return 1; | 170 | return 1; |
149 | 171 | ||
@@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, | |||
155 | tcount--; | 177 | tcount--; |
156 | } while (tcount); | 178 | } while (tcount); |
157 | return 0; | 179 | return 0; |
180 | #endif | ||
158 | } | 181 | } |
159 | 182 | ||
160 | static void __d_free(struct rcu_head *head) | 183 | static void __d_free(struct rcu_head *head) |
diff --git a/fs/namei.c b/fs/namei.c index e2ba62820a0f..378497a744b4 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode) | |||
1374 | return 1; | 1374 | return 1; |
1375 | } | 1375 | } |
1376 | 1376 | ||
1377 | /* | ||
1378 | * We can do the critical dentry name comparison and hashing | ||
1379 | * operations one word at a time, but we are limited to: | ||
1380 | * | ||
1381 | * - Architectures with fast unaligned word accesses. We could | ||
1382 | * do a "get_unaligned()" if this helps and is sufficiently | ||
1383 | * fast. | ||
1384 | * | ||
1385 | * - Little-endian machines (so that we can generate the mask | ||
1386 | * of low bytes efficiently). Again, we *could* do a byte | ||
1387 | * swapping load on big-endian architectures if that is not | ||
1388 | * expensive enough to make the optimization worthless. | ||
1389 | * | ||
1390 | * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we | ||
1391 | * do not trap on the (extremely unlikely) case of a page | ||
1392 | * crossing operation. | ||
1393 | * | ||
1394 | * - Furthermore, we need an efficient 64-bit compile for the | ||
1395 | * 64-bit case in order to generate the "number of bytes in | ||
1396 | * the final mask". Again, that could be replaced with a | ||
1397 | * efficient population count instruction or similar. | ||
1398 | */ | ||
1399 | #ifdef CONFIG_DCACHE_WORD_ACCESS | ||
1400 | |||
1401 | #ifdef CONFIG_64BIT | ||
1402 | |||
1403 | /* | ||
1404 | * Jan Achrenius on G+: microoptimized version of | ||
1405 | * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" | ||
1406 | * that works for the bytemasks without having to | ||
1407 | * mask them first. | ||
1408 | */ | ||
1409 | static inline long count_masked_bytes(unsigned long mask) | ||
1410 | { | ||
1411 | return mask*0x0001020304050608 >> 56; | ||
1412 | } | ||
1413 | |||
1414 | static inline unsigned int fold_hash(unsigned long hash) | ||
1415 | { | ||
1416 | hash += hash >> (8*sizeof(int)); | ||
1417 | return hash; | ||
1418 | } | ||
1419 | |||
1420 | #else /* 32-bit case */ | ||
1421 | |||
1422 | /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ | ||
1423 | static inline long count_masked_bytes(long mask) | ||
1424 | { | ||
1425 | /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ | ||
1426 | long a = (0x0ff0001+mask) >> 23; | ||
1427 | /* Fix the 1 for 00 case */ | ||
1428 | return a & mask; | ||
1429 | } | ||
1430 | |||
1431 | #define fold_hash(x) (x) | ||
1432 | |||
1433 | #endif | ||
1434 | |||
1435 | unsigned int full_name_hash(const unsigned char *name, unsigned int len) | ||
1436 | { | ||
1437 | unsigned long a, mask; | ||
1438 | unsigned long hash = 0; | ||
1439 | |||
1440 | for (;;) { | ||
1441 | a = *(unsigned long *)name; | ||
1442 | hash *= 9; | ||
1443 | if (len < sizeof(unsigned long)) | ||
1444 | break; | ||
1445 | hash += a; | ||
1446 | name += sizeof(unsigned long); | ||
1447 | len -= sizeof(unsigned long); | ||
1448 | if (!len) | ||
1449 | goto done; | ||
1450 | } | ||
1451 | mask = ~(~0ul << len*8); | ||
1452 | hash += mask & a; | ||
1453 | done: | ||
1454 | return fold_hash(hash); | ||
1455 | } | ||
1456 | EXPORT_SYMBOL(full_name_hash); | ||
1457 | |||
1458 | #define ONEBYTES 0x0101010101010101ul | ||
1459 | #define SLASHBYTES 0x2f2f2f2f2f2f2f2ful | ||
1460 | #define HIGHBITS 0x8080808080808080ul | ||
1461 | |||
1462 | /* Return the high bit set in the first byte that is a zero */ | ||
1463 | static inline unsigned long has_zero(unsigned long a) | ||
1464 | { | ||
1465 | return ((a - ONEBYTES) & ~a) & HIGHBITS; | ||
1466 | } | ||
1467 | |||
1468 | /* | ||
1469 | * Calculate the length and hash of the path component, and | ||
1470 | * return the length of the component; | ||
1471 | */ | ||
1472 | static inline unsigned long hash_name(const char *name, unsigned int *hashp) | ||
1473 | { | ||
1474 | unsigned long a, mask, hash, len; | ||
1475 | |||
1476 | hash = a = 0; | ||
1477 | len = -sizeof(unsigned long); | ||
1478 | do { | ||
1479 | hash = (hash + a) * 9; | ||
1480 | len += sizeof(unsigned long); | ||
1481 | a = *(unsigned long *)(name+len); | ||
1482 | /* Do we have any NUL or '/' bytes in this word? */ | ||
1483 | mask = has_zero(a) | has_zero(a ^ SLASHBYTES); | ||
1484 | } while (!mask); | ||
1485 | |||
1486 | /* The mask *below* the first high bit set */ | ||
1487 | mask = (mask - 1) & ~mask; | ||
1488 | mask >>= 7; | ||
1489 | hash += a & mask; | ||
1490 | *hashp = fold_hash(hash); | ||
1491 | |||
1492 | return len + count_masked_bytes(mask); | ||
1493 | } | ||
1494 | |||
1495 | #else | ||
1496 | |||
1377 | unsigned int full_name_hash(const unsigned char *name, unsigned int len) | 1497 | unsigned int full_name_hash(const unsigned char *name, unsigned int len) |
1378 | { | 1498 | { |
1379 | unsigned long hash = init_name_hash(); | 1499 | unsigned long hash = init_name_hash(); |
@@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) | |||
1402 | return len; | 1522 | return len; |
1403 | } | 1523 | } |
1404 | 1524 | ||
1525 | #endif | ||
1526 | |||
1405 | /* | 1527 | /* |
1406 | * Name resolution. | 1528 | * Name resolution. |
1407 | * This is the basic name resolution function, turning a pathname into | 1529 | * This is the basic name resolution function, turning a pathname into |