diff options
Diffstat (limited to 'arch/alpha/lib/strlen.S')
-rw-r--r-- | arch/alpha/lib/strlen.S | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S new file mode 100644 index 000000000000..fe63353de152 --- /dev/null +++ b/arch/alpha/lib/strlen.S | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) | ||
3 | * | ||
4 | * Finds length of a 0-terminated string. Optimized for the | ||
5 | * Alpha architecture: | ||
6 | * | ||
7 | * - memory accessed as aligned quadwords only | ||
8 | * - uses bcmpge to compare 8 bytes in parallel | ||
9 | * - does binary search to find 0 byte in last | ||
10 | * quadword (HAKMEM needed 12 instructions to | ||
11 | * do this instead of the 9 instructions that | ||
12 | * binary search needs). | ||
13 | */ | ||
14 | |||
15 | .set noreorder | ||
16 | .set noat | ||
17 | |||
18 | .align 3 | ||
19 | |||
20 | .globl strlen | ||
21 | .ent strlen | ||
22 | |||
23 | strlen: | ||
24 | ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) | ||
25 | lda $2, -1($31) | ||
26 | insqh $2, $16, $2 | ||
27 | andnot $16, 7, $0 | ||
28 | or $2, $1, $1 | ||
29 | cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 | ||
30 | bne $2, found | ||
31 | |||
32 | loop: ldq $1, 8($0) | ||
33 | addq $0, 8, $0 # addr += 8 | ||
34 | nop # helps dual issue last two insns | ||
35 | cmpbge $31, $1, $2 | ||
36 | beq $2, loop | ||
37 | |||
38 | found: blbs $2, done # make aligned case fast | ||
39 | negq $2, $3 | ||
40 | and $2, $3, $2 | ||
41 | |||
42 | and $2, 0x0f, $1 | ||
43 | addq $0, 4, $3 | ||
44 | cmoveq $1, $3, $0 | ||
45 | |||
46 | and $2, 0x33, $1 | ||
47 | addq $0, 2, $3 | ||
48 | cmoveq $1, $3, $0 | ||
49 | |||
50 | and $2, 0x55, $1 | ||
51 | addq $0, 1, $3 | ||
52 | cmoveq $1, $3, $0 | ||
53 | |||
54 | done: subq $0, $16, $0 | ||
55 | ret $31, ($26) | ||
56 | |||
57 | .end strlen | ||