diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2013-08-01 15:52:17 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2013-08-01 16:23:12 -0400 |
commit | c53c70a90fdce3e7a53a0412abf7cc2b2a645988 (patch) | |
tree | 0b1ec6d04be95ac07563ba518047be73973d25d8 /arch/tile/lib/string-endian.h | |
parent | dd78bc11fb2050b6a3990d0421feca4c68ca4335 (diff) |
tile: optimize and clean up string functions
This change cleans up the string code in a number of ways:
- For memcpy(), fix bug in prefetch and increase distance to 3 lines;
optimize for unaligned data; do all loads before wh64 to make memcpy
safe for forward-overlapping calls; etc. Performance is improved.
- Use new copy_byte() function on tilegx to spread a single byte value
out into a full word using the shufflebytes instruction.
- Clean up header include ordering to be more canonical, and remove
spurious #undefs of function names.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/lib/string-endian.h')
-rw-r--r-- | arch/tile/lib/string-endian.h | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h index c0eed7ce69c3..2e49cbfe9371 100644 --- a/arch/tile/lib/string-endian.h +++ b/arch/tile/lib/string-endian.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. | 2 | * Copyright 2013 Tilera Corporation. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
@@ -31,3 +31,14 @@ | |||
31 | #define CFZ(x) __insn_clz(x) | 31 | #define CFZ(x) __insn_clz(x) |
32 | #define REVCZ(x) __insn_ctz(x) | 32 | #define REVCZ(x) __insn_ctz(x) |
33 | #endif | 33 | #endif |
34 | |||
35 | /* | ||
36 | * Create eight copies of the byte in a uint64_t. Byte Shuffle uses | ||
37 | * the bytes of srcB as the index into the dest vector to select a | ||
38 | * byte. With all indices of zero, the first byte is copied into all | ||
39 | * the other bytes. | ||
40 | */ | ||
41 | static inline uint64_t copy_byte(uint8_t byte) | ||
42 | { | ||
43 | return __insn_shufflebytes(byte, 0, 0); | ||
44 | } | ||