diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-06 14:16:17 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-08 21:08:44 -0500 |
commit | bfcfaa77bdf0f775263e906015982a608df01c76 (patch) | |
tree | 6671137d4af157b851d953b7e2809abbfa809e81 /fs/namei.c | |
parent | 9f8050c4f99789d03ca96d4e625bd6637241828f (diff) |
vfs: use 'unsigned long' accesses for dcache name comparison and hashing
Ok, this is hacky, and only works on little-endian machines with goo
unaligned handling. And even then only with CONFIG_DEBUG_PAGEALLOC
disabled, since it can access up to 7 bytes after the pathname.
But it runs like a bat out of hell.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/fs/namei.c b/fs/namei.c index e2ba62820a0f..378497a744b4 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode) | |||
1374 | return 1; | 1374 | return 1; |
1375 | } | 1375 | } |
1376 | 1376 | ||
1377 | /* | ||
1378 | * We can do the critical dentry name comparison and hashing | ||
1379 | * operations one word at a time, but we are limited to: | ||
1380 | * | ||
1381 | * - Architectures with fast unaligned word accesses. We could | ||
1382 | * do a "get_unaligned()" if this helps and is sufficiently | ||
1383 | * fast. | ||
1384 | * | ||
1385 | * - Little-endian machines (so that we can generate the mask | ||
1386 | * of low bytes efficiently). Again, we *could* do a byte | ||
1387 | * swapping load on big-endian architectures if that is not | ||
1388 | * expensive enough to make the optimization worthless. | ||
1389 | * | ||
1390 | * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we | ||
1391 | * do not trap on the (extremely unlikely) case of a page | ||
1392 | * crossing operation. | ||
1393 | * | ||
1394 | * - Furthermore, we need an efficient 64-bit compile for the | ||
1395 | * 64-bit case in order to generate the "number of bytes in | ||
1396 | * the final mask". Again, that could be replaced with a | ||
1397 | * efficient population count instruction or similar. | ||
1398 | */ | ||
1399 | #ifdef CONFIG_DCACHE_WORD_ACCESS | ||
1400 | |||
1401 | #ifdef CONFIG_64BIT | ||
1402 | |||
1403 | /* | ||
1404 | * Jan Achrenius on G+: microoptimized version of | ||
1405 | * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" | ||
1406 | * that works for the bytemasks without having to | ||
1407 | * mask them first. | ||
1408 | */ | ||
1409 | static inline long count_masked_bytes(unsigned long mask) | ||
1410 | { | ||
1411 | return mask*0x0001020304050608 >> 56; | ||
1412 | } | ||
1413 | |||
1414 | static inline unsigned int fold_hash(unsigned long hash) | ||
1415 | { | ||
1416 | hash += hash >> (8*sizeof(int)); | ||
1417 | return hash; | ||
1418 | } | ||
1419 | |||
1420 | #else /* 32-bit case */ | ||
1421 | |||
1422 | /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ | ||
1423 | static inline long count_masked_bytes(long mask) | ||
1424 | { | ||
1425 | /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ | ||
1426 | long a = (0x0ff0001+mask) >> 23; | ||
1427 | /* Fix the 1 for 00 case */ | ||
1428 | return a & mask; | ||
1429 | } | ||
1430 | |||
1431 | #define fold_hash(x) (x) | ||
1432 | |||
1433 | #endif | ||
1434 | |||
1435 | unsigned int full_name_hash(const unsigned char *name, unsigned int len) | ||
1436 | { | ||
1437 | unsigned long a, mask; | ||
1438 | unsigned long hash = 0; | ||
1439 | |||
1440 | for (;;) { | ||
1441 | a = *(unsigned long *)name; | ||
1442 | hash *= 9; | ||
1443 | if (len < sizeof(unsigned long)) | ||
1444 | break; | ||
1445 | hash += a; | ||
1446 | name += sizeof(unsigned long); | ||
1447 | len -= sizeof(unsigned long); | ||
1448 | if (!len) | ||
1449 | goto done; | ||
1450 | } | ||
1451 | mask = ~(~0ul << len*8); | ||
1452 | hash += mask & a; | ||
1453 | done: | ||
1454 | return fold_hash(hash); | ||
1455 | } | ||
1456 | EXPORT_SYMBOL(full_name_hash); | ||
1457 | |||
1458 | #define ONEBYTES 0x0101010101010101ul | ||
1459 | #define SLASHBYTES 0x2f2f2f2f2f2f2f2ful | ||
1460 | #define HIGHBITS 0x8080808080808080ul | ||
1461 | |||
1462 | /* Return the high bit set in the first byte that is a zero */ | ||
1463 | static inline unsigned long has_zero(unsigned long a) | ||
1464 | { | ||
1465 | return ((a - ONEBYTES) & ~a) & HIGHBITS; | ||
1466 | } | ||
1467 | |||
1468 | /* | ||
1469 | * Calculate the length and hash of the path component, and | ||
1470 | * return the length of the component; | ||
1471 | */ | ||
1472 | static inline unsigned long hash_name(const char *name, unsigned int *hashp) | ||
1473 | { | ||
1474 | unsigned long a, mask, hash, len; | ||
1475 | |||
1476 | hash = a = 0; | ||
1477 | len = -sizeof(unsigned long); | ||
1478 | do { | ||
1479 | hash = (hash + a) * 9; | ||
1480 | len += sizeof(unsigned long); | ||
1481 | a = *(unsigned long *)(name+len); | ||
1482 | /* Do we have any NUL or '/' bytes in this word? */ | ||
1483 | mask = has_zero(a) | has_zero(a ^ SLASHBYTES); | ||
1484 | } while (!mask); | ||
1485 | |||
1486 | /* The mask *below* the first high bit set */ | ||
1487 | mask = (mask - 1) & ~mask; | ||
1488 | mask >>= 7; | ||
1489 | hash += a & mask; | ||
1490 | *hashp = fold_hash(hash); | ||
1491 | |||
1492 | return len + count_masked_bytes(mask); | ||
1493 | } | ||
1494 | |||
1495 | #else | ||
1496 | |||
1377 | unsigned int full_name_hash(const unsigned char *name, unsigned int len) | 1497 | unsigned int full_name_hash(const unsigned char *name, unsigned int len) |
1378 | { | 1498 | { |
1379 | unsigned long hash = init_name_hash(); | 1499 | unsigned long hash = init_name_hash(); |
@@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) | |||
1402 | return len; | 1522 | return len; |
1403 | } | 1523 | } |
1404 | 1524 | ||
1525 | #endif | ||
1526 | |||
1405 | /* | 1527 | /* |
1406 | * Name resolution. | 1528 | * Name resolution. |
1407 | * This is the basic name resolution function, turning a pathname into | 1529 | * This is the basic name resolution function, turning a pathname into |