1 files changed, 234 insertions, 0 deletions
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
new file mode 100644
index 000000000000..42f828b06c59
--- /dev/null
+++ b/arch/arm64/lib/strcmp.S
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * compare two strings
+ *
+ * Parameters:
+ *      x0 - const string 1 pointer
+ *    x1 - const string 2 pointer
+ * Returns:
+ * x0 - an integer less than, equal to, or greater than zero
+ * if  s1  is  found, respectively, to be less than, to match,
+ * or be greater than s2.
+ */
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+/* Parameters and result.  */
+src1            .req    x0
+src2            .req    x1
+result          .req    x0
+/* Internal variables.  */
+data1           .req    x2
+data1w          .req    w2
+data2           .req    x3
+data2w          .req    w3
+has_nul         .req    x4
+diff            .req    x5
+syndrome        .req    x6
+tmp1            .req    x7
+tmp2            .req    x8
+tmp3            .req    x9
+zeroones        .req    x10
+pos             .req    x11
+ENTRY(strcmp)
+        eor     tmp1, src1, src2
+        mov     zeroones, #REP8_01
+        tst     tmp1, #7
+        b.ne    .Lmisaligned8
+        ands    tmp1, src1, #7
+        b.ne    .Lmutual_align
+        /*
+        * NUL detection works on the principle that (X - 1) & (~X) & 0x80
+        * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+        * can be done in parallel across the entire word.
+        */
+.Lloop_aligned:
+        ldr     data1, [src1], #8
+        ldr     data2, [src2], #8
+.Lstart_realigned:
+        sub     tmp1, data1, zeroones
+        orr     tmp2, data1, #REP8_7f
+        eor     diff, data1, data2      /* Non-zero if differences found.  */
+        bic     has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
+        orr     syndrome, diff, has_nul
+        cbz     syndrome, .Lloop_aligned
+        b       .Lcal_cmpresult
+.Lmutual_align:
+        /*
+        * Sources are mutually aligned, but are not currently at an
+        * alignment boundary.  Round down the addresses and then mask off
+        * the bytes that preceed the start point.
+        */
+        bic     src1, src1, #7
+        bic     src2, src2, #7
+        lsl     tmp1, tmp1, #3          /* Bytes beyond alignment -> bits.  */
+        ldr     data1, [src1], #8
+        neg     tmp1, tmp1              /* Bits to alignment -64.  */
+        ldr     data2, [src2], #8
+        mov     tmp2, #~0
+        /* Big-endian.  Early bytes are at MSB.  */
+CPU_BE( lsl     tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
+        /* Little-endian.  Early bytes are at LSB.  */
+CPU_LE( lsr     tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
+        orr     data1, data1, tmp2
+        orr     data2, data2, tmp2
+        b       .Lstart_realigned
+.Lmisaligned8:
+        /*
+        * Get the align offset length to compare per byte first.
+        * After this process, one string's address will be aligned.
+        */
+        and     tmp1, src1, #7
+        neg     tmp1, tmp1
+        add     tmp1, tmp1, #8
+        and     tmp2, src2, #7
+        neg     tmp2, tmp2
+        add     tmp2, tmp2, #8
+        subs    tmp3, tmp1, tmp2
+        csel    pos, tmp1, tmp2, hi /*Choose the maximum. */
+.Ltinycmp:
+        ldrb    data1w, [src1], #1
+        ldrb    data2w, [src2], #1
+        subs    pos, pos, #1
+        ccmp    data1w, #1, #0, ne  /* NZCV = 0b0000.  */
+        ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+        b.eq    .Ltinycmp
+        cbnz    pos, 1f /*find the null or unequal...*/
+        cmp     data1w, #1
+        ccmp    data1w, data2w, #0, cs
+        b.eq    .Lstart_align /*the last bytes are equal....*/
+1:
+        sub     result, data1, data2
+        ret
+.Lstart_align:
+        ands    xzr, src1, #7
+        b.eq    .Lrecal_offset
+        /*process more leading bytes to make str1 aligned...*/
+        add     src1, src1, tmp3
+        add     src2, src2, tmp3
+        /*load 8 bytes from aligned str1 and non-aligned str2..*/
+        ldr     data1, [src1], #8
+        ldr     data2, [src2], #8
+        sub     tmp1, data1, zeroones
+        orr     tmp2, data1, #REP8_7f
+        bic     has_nul, tmp1, tmp2
+        eor     diff, data1, data2 /* Non-zero if differences found.  */
+        orr     syndrome, diff, has_nul
+        cbnz    syndrome, .Lcal_cmpresult
+        /*How far is the current str2 from the alignment boundary...*/
+        and     tmp3, tmp3, #7
+.Lrecal_offset:
+        neg     pos, tmp3
+.Lloopcmp_proc:
+        /*
+        * Divide the eight bytes into two parts. First,backwards the src2
+        * to an alignment boundary,load eight bytes from the SRC2 alignment
+        * boundary,then compare with the relative bytes from SRC1.
+        * If all 8 bytes are equal,then start the second part's comparison.
+        * Otherwise finish the comparison.
+        * This special handle can garantee all the accesses are in the
+        * thread/task space in avoid to overrange access.
+        */
+        ldr     data1, [src1,pos]
+        ldr     data2, [src2,pos]
+        sub     tmp1, data1, zeroones
+        orr     tmp2, data1, #REP8_7f
+        bic     has_nul, tmp1, tmp2
+        eor     diff, data1, data2  /* Non-zero if differences found.  */
+        orr     syndrome, diff, has_nul
+        cbnz    syndrome, .Lcal_cmpresult
+        /*The second part process*/
+        ldr     data1, [src1], #8
+        ldr     data2, [src2], #8
+        sub     tmp1, data1, zeroones
+        orr     tmp2, data1, #REP8_7f
+        bic     has_nul, tmp1, tmp2
+        eor     diff, data1, data2  /* Non-zero if differences found.  */
+        orr     syndrome, diff, has_nul
+        cbz     syndrome, .Lloopcmp_proc
+.Lcal_cmpresult:
+        /*
+        * reversed the byte-order as big-endian,then CLZ can find the most
+        * significant zero bits.
+        */
+CPU_LE( rev     syndrome, syndrome )
+CPU_LE( rev     data1, data1 )
+CPU_LE( rev     data2, data2 )
+        /*
+        * For big-endian we cannot use the trick with the syndrome value
+        * as carry-propagation can corrupt the upper bits if the trailing
+        * bytes in the string contain 0x01.
+        * However, if there is no NUL byte in the dword, we can generate
+        * the result directly.  We ca not just subtract the bytes as the
+        * MSB might be significant.
+        */
+CPU_BE( cbnz    has_nul, 1f )
+CPU_BE( cmp     data1, data2 )
+CPU_BE( cset    result, ne )
+CPU_BE( cneg    result, result, lo )
+CPU_BE( ret )
+CPU_BE( 1: )
+        /*Re-compute the NUL-byte detection, using a byte-reversed value. */
+CPU_BE( rev     tmp3, data1 )
+CPU_BE( sub     tmp1, tmp3, zeroones )
+CPU_BE( orr     tmp2, tmp3, #REP8_7f )
+CPU_BE( bic     has_nul, tmp1, tmp2 )
+CPU_BE( rev     has_nul, has_nul )
+CPU_BE( orr     syndrome, diff, has_nul )
+        clz     pos, syndrome
+        /*
+        * The MS-non-zero bit of the syndrome marks either the first bit
+        * that is different, or the top bit of the first zero byte.
+        * Shifting left now will bring the critical information into the
+        * top bits.
+        */
+        lsl     data1, data1, pos
+        lsl     data2, data2, pos
+        /*
+        * But we need to zero-extend (char is unsigned) the value and then
+        * perform a signed 32-bit subtraction.
+        */
+        lsr     data1, data1, #56
+        sub     result, data1, data2, lsr #56
+        ret
+ENDPROC(strcmp)

diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S new file mode 100644 index 000000000000..42f828b06c59 --- /dev/null +++ b/arch/arm64/lib/strcmp.S
@@ -0,0 +1,234 @@
	1	/*
	2	* Copyright (C) 2013 ARM Ltd.
	3	* Copyright (C) 2013 Linaro.
	4	*
	5	* This code is based on glibc cortex strings work originally authored by Linaro
	6	* and re-licensed under GPLv2 for the Linux kernel. The original code can
	7	* be found @
	8	*
	9	* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
	10	* files/head:/src/aarch64/
	11	*
	12	* This program is free software; you can redistribute it and/or modify
	13	* it under the terms of the GNU General Public License version 2 as
	14	* published by the Free Software Foundation.
	15	*
	16	* This program is distributed in the hope that it will be useful,
	17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	* GNU General Public License for more details.
	20	*
	21	* You should have received a copy of the GNU General Public License
	22	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	23	*/
	24
	25	#include <linux/linkage.h>
	26	#include <asm/assembler.h>
	27
	28	/*
	29	* compare two strings
	30	*
	31	* Parameters:
	32	* x0 - const string 1 pointer
	33	* x1 - const string 2 pointer
	34	* Returns:
	35	* x0 - an integer less than, equal to, or greater than zero
	36	* if s1 is found, respectively, to be less than, to match,
	37	* or be greater than s2.
	38	*/
	39
	40	#define REP8_01 0x0101010101010101
	41	#define REP8_7f 0x7f7f7f7f7f7f7f7f
	42	#define REP8_80 0x8080808080808080
	43
	44	/* Parameters and result. */
	45	src1 .req x0
	46	src2 .req x1
	47	result .req x0
	48
	49	/* Internal variables. */
	50	data1 .req x2
	51	data1w .req w2
	52	data2 .req x3
	53	data2w .req w3
	54	has_nul .req x4
	55	diff .req x5
	56	syndrome .req x6
	57	tmp1 .req x7
	58	tmp2 .req x8
	59	tmp3 .req x9
	60	zeroones .req x10
	61	pos .req x11
	62
	63	ENTRY(strcmp)
	64	eor tmp1, src1, src2
	65	mov zeroones, #REP8_01
	66	tst tmp1, #7
	67	b.ne .Lmisaligned8
	68	ands tmp1, src1, #7
	69	b.ne .Lmutual_align
	70
	71	/*
	72	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
	73	* (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
	74	* can be done in parallel across the entire word.
	75	*/
	76	.Lloop_aligned:
	77	ldr data1, [src1], #8
	78	ldr data2, [src2], #8
	79	.Lstart_realigned:
	80	sub tmp1, data1, zeroones
	81	orr tmp2, data1, #REP8_7f
	82	eor diff, data1, data2 /* Non-zero if differences found. */
	83	bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
	84	orr syndrome, diff, has_nul
	85	cbz syndrome, .Lloop_aligned
	86	b .Lcal_cmpresult
	87
	88	.Lmutual_align:
	89	/*
	90	* Sources are mutually aligned, but are not currently at an
	91	* alignment boundary. Round down the addresses and then mask off
	92	* the bytes that preceed the start point.
	93	*/
	94	bic src1, src1, #7
	95	bic src2, src2, #7
	96	lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
	97	ldr data1, [src1], #8
	98	neg tmp1, tmp1 /* Bits to alignment -64. */
	99	ldr data2, [src2], #8
	100	mov tmp2, #~0
	101	/* Big-endian. Early bytes are at MSB. */
	102	CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
	103	/* Little-endian. Early bytes are at LSB. */
	104	CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
	105
	106	orr data1, data1, tmp2
	107	orr data2, data2, tmp2
	108	b .Lstart_realigned
	109
	110	.Lmisaligned8:
	111	/*
	112	* Get the align offset length to compare per byte first.
	113	* After this process, one string's address will be aligned.
	114	*/
	115	and tmp1, src1, #7
	116	neg tmp1, tmp1
	117	add tmp1, tmp1, #8
	118	and tmp2, src2, #7
	119	neg tmp2, tmp2
	120	add tmp2, tmp2, #8
	121	subs tmp3, tmp1, tmp2
	122	csel pos, tmp1, tmp2, hi /Choose the maximum. /
	123	.Ltinycmp:
	124	ldrb data1w, [src1], #1
	125	ldrb data2w, [src2], #1
	126	subs pos, pos, #1
	127	ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
	128	ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
	129	b.eq .Ltinycmp
	130	cbnz pos, 1f /find the null or unequal.../
	131	cmp data1w, #1
	132	ccmp data1w, data2w, #0, cs
	133	b.eq .Lstart_align /the last bytes are equal..../
	134	1:
	135	sub result, data1, data2
	136	ret
	137
	138	.Lstart_align:
	139	ands xzr, src1, #7
	140	b.eq .Lrecal_offset
	141	/process more leading bytes to make str1 aligned.../
	142	add src1, src1, tmp3
	143	add src2, src2, tmp3
	144	/load 8 bytes from aligned str1 and non-aligned str2../
	145	ldr data1, [src1], #8
	146	ldr data2, [src2], #8
	147
	148	sub tmp1, data1, zeroones
	149	orr tmp2, data1, #REP8_7f
	150	bic has_nul, tmp1, tmp2
	151	eor diff, data1, data2 /* Non-zero if differences found. */
	152	orr syndrome, diff, has_nul
	153	cbnz syndrome, .Lcal_cmpresult
	154	/How far is the current str2 from the alignment boundary.../
	155	and tmp3, tmp3, #7
	156	.Lrecal_offset:
	157	neg pos, tmp3
	158	.Lloopcmp_proc:
	159	/*
	160	* Divide the eight bytes into two parts. First,backwards the src2
	161	* to an alignment boundary,load eight bytes from the SRC2 alignment
	162	* boundary,then compare with the relative bytes from SRC1.
	163	* If all 8 bytes are equal,then start the second part's comparison.
	164	* Otherwise finish the comparison.
	165	* This special handle can garantee all the accesses are in the
	166	* thread/task space in avoid to overrange access.
	167	*/
	168	ldr data1, [src1,pos]
	169	ldr data2, [src2,pos]
	170	sub tmp1, data1, zeroones
	171	orr tmp2, data1, #REP8_7f
	172	bic has_nul, tmp1, tmp2
	173	eor diff, data1, data2 /* Non-zero if differences found. */
	174	orr syndrome, diff, has_nul
	175	cbnz syndrome, .Lcal_cmpresult
	176
	177	/The second part process/
	178	ldr data1, [src1], #8
	179	ldr data2, [src2], #8
	180	sub tmp1, data1, zeroones
	181	orr tmp2, data1, #REP8_7f
	182	bic has_nul, tmp1, tmp2
	183	eor diff, data1, data2 /* Non-zero if differences found. */
	184	orr syndrome, diff, has_nul
	185	cbz syndrome, .Lloopcmp_proc
	186
	187	.Lcal_cmpresult:
	188	/*
	189	* reversed the byte-order as big-endian,then CLZ can find the most
	190	* significant zero bits.
	191	*/
	192	CPU_LE( rev syndrome, syndrome )
	193	CPU_LE( rev data1, data1 )
	194	CPU_LE( rev data2, data2 )
	195
	196	/*
	197	* For big-endian we cannot use the trick with the syndrome value
	198	* as carry-propagation can corrupt the upper bits if the trailing
	199	* bytes in the string contain 0x01.
	200	* However, if there is no NUL byte in the dword, we can generate
	201	* the result directly. We ca not just subtract the bytes as the
	202	* MSB might be significant.
	203	*/
	204	CPU_BE( cbnz has_nul, 1f )
	205	CPU_BE( cmp data1, data2 )
	206	CPU_BE( cset result, ne )
	207	CPU_BE( cneg result, result, lo )
	208	CPU_BE( ret )
	209	CPU_BE( 1: )
	210	/Re-compute the NUL-byte detection, using a byte-reversed value. /
	211	CPU_BE( rev tmp3, data1 )
	212	CPU_BE( sub tmp1, tmp3, zeroones )
	213	CPU_BE( orr tmp2, tmp3, #REP8_7f )
	214	CPU_BE( bic has_nul, tmp1, tmp2 )
	215	CPU_BE( rev has_nul, has_nul )
	216	CPU_BE( orr syndrome, diff, has_nul )
	217
	218	clz pos, syndrome
	219	/*
	220	* The MS-non-zero bit of the syndrome marks either the first bit
	221	* that is different, or the top bit of the first zero byte.
	222	* Shifting left now will bring the critical information into the
	223	* top bits.
	224	*/
	225	lsl data1, data1, pos
	226	lsl data2, data2, pos
	227	/*
	228	* But we need to zero-extend (char is unsigned) the value and then
	229	* perform a signed 32-bit subtraction.
	230	*/
	231	lsr data1, data1, #56
	232	sub result, data1, data2, lsr #56
	233	ret
	234	ENDPROC(strcmp)