Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/kernel/vector.S
1 files changed, 172 insertions, 0 deletions
diff --git a/arch/ppc64/kernel/vector.S b/arch/ppc64/kernel/vector.S
new file mode 100644
index 000000000000..b79d33e4001e
--- /dev/null
+++ b/arch/ppc64/kernel/vector.S
@@ -0,0 +1,172 @@
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+/*
+ * The routines below are in assembler so we can closely control the
+ * usage of floating-point registers.  These routines must be called
+ * with preempt disabled.
+ */
+        .section ".toc","aw"
+fpzero:
+        .tc     FD_0_0[TC],0
+fpone:
+        .tc     FD_3ff00000_0[TC],0x3ff0000000000000    /* 1.0 */
+fphalf:
+        .tc     FD_3fe00000_0[TC],0x3fe0000000000000    /* 0.5 */
+        .text
+/*
+ * Internal routine to enable floating point and set FPSCR to 0.
+ * Don't call it from C; it doesn't use the normal calling convention.
+ */
+fpenable:
+        mfmsr   r10
+        ori     r11,r10,MSR_FP
+        mtmsr   r11
+        isync
+        stfd    fr31,-8(r1)
+        stfd    fr0,-16(r1)
+        stfd    fr1,-24(r1)
+        mffs    fr31
+        lfd     fr1,fpzero@toc(r2)
+        mtfsf   0xff,fr1
+        blr
+fpdisable:
+        mtlr    r12
+        mtfsf   0xff,fr31
+        lfd     fr1,-24(r1)
+        lfd     fr0,-16(r1)
+        lfd     fr31,-8(r1)
+        mtmsr   r10
+        isync
+        blr
+/*
+ * Vector add, floating point.
+ */
+_GLOBAL(vaddfp)
+        mflr    r12
+        bl      fpenable
+        li      r0,4
+        mtctr   r0
+        li      r6,0
+1:      lfsx    fr0,r4,r6
+        lfsx    fr1,r5,r6
+        fadds   fr0,fr0,fr1
+        stfsx   fr0,r3,r6
+        addi    r6,r6,4
+        bdnz    1b
+        b       fpdisable
+/*
+ * Vector subtract, floating point.
+ */
+_GLOBAL(vsubfp)
+        mflr    r12
+        bl      fpenable
+        li      r0,4
+        mtctr   r0
+        li      r6,0
+1:      lfsx    fr0,r4,r6
+        lfsx    fr1,r5,r6
+        fsubs   fr0,fr0,fr1
+        stfsx   fr0,r3,r6
+        addi    r6,r6,4
+        bdnz    1b
+        b       fpdisable
+/*
+ * Vector multiply and add, floating point.
+ */
+_GLOBAL(vmaddfp)
+        mflr    r12
+        bl      fpenable
+        stfd    fr2,-32(r1)
+        li      r0,4
+        mtctr   r0
+        li      r7,0
+1:      lfsx    fr0,r4,r7
+        lfsx    fr1,r5,r7
+        lfsx    fr2,r6,r7
+        fmadds  fr0,fr0,fr2,fr1
+        stfsx   fr0,r3,r7
+        addi    r7,r7,4
+        bdnz    1b
+        lfd     fr2,-32(r1)
+        b       fpdisable
+/*
+ * Vector negative multiply and subtract, floating point.
+ */
+_GLOBAL(vnmsubfp)
+        mflr    r12
+        bl      fpenable
+        stfd    fr2,-32(r1)
+        li      r0,4
+        mtctr   r0
+        li      r7,0
+1:      lfsx    fr0,r4,r7
+        lfsx    fr1,r5,r7
+        lfsx    fr2,r6,r7
+        fnmsubs fr0,fr0,fr2,fr1
+        stfsx   fr0,r3,r7
+        addi    r7,r7,4
+        bdnz    1b
+        lfd     fr2,-32(r1)
+        b       fpdisable
+/*
+ * Vector reciprocal estimate.  We just compute 1.0/x.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrefp)
+        mflr    r12
+        bl      fpenable
+        li      r0,4
+        lfd     fr1,fpone@toc(r2)
+        mtctr   r0
+        li      r6,0
+1:      lfsx    fr0,r4,r6
+        fdivs   fr0,fr1,fr0
+        stfsx   fr0,r3,r6
+        addi    r6,r6,4
+        bdnz    1b
+        b       fpdisable
+/*
+ * Vector reciprocal square-root estimate, floating point.
+ * We use the frsqrte instruction for the initial estimate followed
+ * by 2 iterations of Newton-Raphson to get sufficient accuracy.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrsqrtefp)
+        mflr    r12
+        bl      fpenable
+        stfd    fr2,-32(r1)
+        stfd    fr3,-40(r1)
+        stfd    fr4,-48(r1)
+        stfd    fr5,-56(r1)
+        li      r0,4
+        lfd     fr4,fpone@toc(r2)
+        lfd     fr5,fphalf@toc(r2)
+        mtctr   r0
+        li      r6,0
+1:      lfsx    fr0,r4,r6
+        frsqrte fr1,fr0         /* r = frsqrte(s) */
+        fmuls   fr3,fr1,fr0     /* r * s */
+        fmuls   fr2,fr1,fr5     /* r * 0.5 */
+        fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
+        fmadds  fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
+        fmuls   fr3,fr1,fr0     /* r * s */
+        fmuls   fr2,fr1,fr5     /* r * 0.5 */
+        fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
+        fmadds  fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
+        stfsx   fr1,r3,r6
+        addi    r6,r6,4
+        bdnz    1b
+        lfd     fr5,-56(r1)
+        lfd     fr4,-48(r1)
+        lfd     fr3,-40(r1)
+        lfd     fr2,-32(r1)
+        b       fpdisable
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/kernel/vector.S

diff --git a/arch/ppc64/kernel/vector.S b/arch/ppc64/kernel/vector.S new file mode 100644 index 000000000000..b79d33e4001e --- /dev/null +++ b/arch/ppc64/kernel/vector.S
@@ -0,0 +1,172 @@
	1	#include <asm/ppc_asm.h>
	2	#include <asm/processor.h>
	3
	4	/*
	5	* The routines below are in assembler so we can closely control the
	6	* usage of floating-point registers. These routines must be called
	7	* with preempt disabled.
	8	*/
	9	.section ".toc","aw"
	10	fpzero:
	11	.tc FD_0_0[TC],0
	12	fpone:
	13	.tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
	14	fphalf:
	15	.tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
	16
	17	.text
	18	/*
	19	* Internal routine to enable floating point and set FPSCR to 0.
	20	* Don't call it from C; it doesn't use the normal calling convention.
	21	*/
	22	fpenable:
	23	mfmsr r10
	24	ori r11,r10,MSR_FP
	25	mtmsr r11
	26	isync
	27	stfd fr31,-8(r1)
	28	stfd fr0,-16(r1)
	29	stfd fr1,-24(r1)
	30	mffs fr31
	31	lfd fr1,fpzero@toc(r2)
	32	mtfsf 0xff,fr1
	33	blr
	34
	35	fpdisable:
	36	mtlr r12
	37	mtfsf 0xff,fr31
	38	lfd fr1,-24(r1)
	39	lfd fr0,-16(r1)
	40	lfd fr31,-8(r1)
	41	mtmsr r10
	42	isync
	43	blr
	44
	45	/*
	46	* Vector add, floating point.
	47	*/
	48	_GLOBAL(vaddfp)
	49	mflr r12
	50	bl fpenable
	51	li r0,4
	52	mtctr r0
	53	li r6,0
	54	1: lfsx fr0,r4,r6
	55	lfsx fr1,r5,r6
	56	fadds fr0,fr0,fr1
	57	stfsx fr0,r3,r6
	58	addi r6,r6,4
	59	bdnz 1b
	60	b fpdisable
	61
	62	/*
	63	* Vector subtract, floating point.
	64	*/
	65	_GLOBAL(vsubfp)
	66	mflr r12
	67	bl fpenable
	68	li r0,4
	69	mtctr r0
	70	li r6,0
	71	1: lfsx fr0,r4,r6
	72	lfsx fr1,r5,r6
	73	fsubs fr0,fr0,fr1
	74	stfsx fr0,r3,r6
	75	addi r6,r6,4
	76	bdnz 1b
	77	b fpdisable
	78
	79	/*
	80	* Vector multiply and add, floating point.
	81	*/
	82	_GLOBAL(vmaddfp)
	83	mflr r12
	84	bl fpenable
	85	stfd fr2,-32(r1)
	86	li r0,4
	87	mtctr r0
	88	li r7,0
	89	1: lfsx fr0,r4,r7
	90	lfsx fr1,r5,r7
	91	lfsx fr2,r6,r7
	92	fmadds fr0,fr0,fr2,fr1
	93	stfsx fr0,r3,r7
	94	addi r7,r7,4
	95	bdnz 1b
	96	lfd fr2,-32(r1)
	97	b fpdisable
	98
	99	/*
	100	* Vector negative multiply and subtract, floating point.
	101	*/
	102	_GLOBAL(vnmsubfp)
	103	mflr r12
	104	bl fpenable
	105	stfd fr2,-32(r1)
	106	li r0,4
	107	mtctr r0
	108	li r7,0
	109	1: lfsx fr0,r4,r7
	110	lfsx fr1,r5,r7
	111	lfsx fr2,r6,r7
	112	fnmsubs fr0,fr0,fr2,fr1
	113	stfsx fr0,r3,r7
	114	addi r7,r7,4
	115	bdnz 1b
	116	lfd fr2,-32(r1)
	117	b fpdisable
	118
	119	/*
	120	* Vector reciprocal estimate. We just compute 1.0/x.
	121	* r3 -> destination, r4 -> source.
	122	*/
	123	_GLOBAL(vrefp)
	124	mflr r12
	125	bl fpenable
	126	li r0,4
	127	lfd fr1,fpone@toc(r2)
	128	mtctr r0
	129	li r6,0
	130	1: lfsx fr0,r4,r6
	131	fdivs fr0,fr1,fr0
	132	stfsx fr0,r3,r6
	133	addi r6,r6,4
	134	bdnz 1b
	135	b fpdisable
	136
	137	/*
	138	* Vector reciprocal square-root estimate, floating point.
	139	* We use the frsqrte instruction for the initial estimate followed
	140	* by 2 iterations of Newton-Raphson to get sufficient accuracy.
	141	* r3 -> destination, r4 -> source.
	142	*/
	143	_GLOBAL(vrsqrtefp)
	144	mflr r12
	145	bl fpenable
	146	stfd fr2,-32(r1)
	147	stfd fr3,-40(r1)
	148	stfd fr4,-48(r1)
	149	stfd fr5,-56(r1)
	150	li r0,4
	151	lfd fr4,fpone@toc(r2)
	152	lfd fr5,fphalf@toc(r2)
	153	mtctr r0
	154	li r6,0
	155	1: lfsx fr0,r4,r6
	156	frsqrte fr1,fr0 /* r = frsqrte(s) */
	157	fmuls fr3,fr1,fr0 /* r * s */
	158	fmuls fr2,fr1,fr5 /* r * 0.5 */
	159	fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
	160	fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
	161	fmuls fr3,fr1,fr0 /* r * s */
	162	fmuls fr2,fr1,fr5 /* r * 0.5 */
	163	fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
	164	fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
	165	stfsx fr1,r3,r6
	166	addi r6,r6,4
	167	bdnz 1b
	168	lfd fr5,-56(r1)
	169	lfd fr4,-48(r1)
	170	lfd fr3,-40(r1)
	171	lfd fr2,-32(r1)
	172	b fpdisable