1 files changed, 488 insertions, 0 deletions
diff --git a/arch/parisc/lib/io.c b/arch/parisc/lib/io.c
new file mode 100644
index 000000000000..7c1406ff825e
--- /dev/null
+++ b/arch/parisc/lib/io.c
@@ -0,0 +1,488 @@
+/*
+ * arch/parisc/lib/io.c
+ *
+ * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
+ * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
+ *
+ * IO accessing functions which shouldn't be inlined because they're too big
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/io.h>
+/* Copies a block of memory to a device in an efficient manner.
+ * Assumes the device can cope with 32-bit transfers.  If it can't,
+ * don't use this function.
+ */
+void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
+{
+        if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
+                goto bytecopy;
+        while ((unsigned long)dst & 3) {
+                writeb(*(char *)src, dst++);
+                src++;
+                count--;
+        }
+        while (count > 3) {
+                __raw_writel(*(u32 *)src, dst);
+                src += 4;
+                dst += 4;
+                count -= 4;
+        }
+ bytecopy:
+        while (count--) {
+                writeb(*(char *)src, dst++);
+                src++;
+        }
+}
+/*
+** Copies a block of memory from a device in an efficient manner.
+** Assumes the device can cope with 32-bit transfers.  If it can't,
+** don't use this function.
+**
+** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
+**      27341/64    = 427 cyc per int
+**      61311/128   = 478 cyc per short
+**      122637/256  = 479 cyc per byte
+** Ergo bus latencies dominant (not transfer size).
+**      Minimize total number of transfers at cost of CPU cycles.
+**      TODO: only look at src alignment and adjust the stores to dest.
+*/
+void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
+{
+        /* first compare alignment of src/dst */ 
+        if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
+                goto bytecopy;
+        if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
+                goto shortcopy;
+        /* Then check for misaligned start address */
+        if ((unsigned long)src & 1) {
+                *(u8 *)dst = readb(src);
+                src++;
+                dst++;
+                count--;
+                if (count < 2) goto bytecopy;
+        }
+        if ((unsigned long)src & 2) {
+                *(u16 *)dst = __raw_readw(src);
+                src += 2;
+                dst += 2;
+                count -= 2;
+        }
+        while (count > 3) {
+                *(u32 *)dst = __raw_readl(src);
+                dst += 4;
+                src += 4;
+                count -= 4;
+        }
+ shortcopy:
+        while (count > 1) {
+                *(u16 *)dst = __raw_readw(src);
+                src += 2;
+                dst += 2;
+                count -= 2;
+        }
+ bytecopy:
+        while (count--) {
+                *(char *)dst = readb(src);
+                src++;
+                dst++;
+        }
+}
+/* Sets a block of memory on a device to a given value.
+ * Assumes the device can cope with 32-bit transfers.  If it can't,
+ * don't use this function.
+ */
+void memset_io(volatile void __iomem *addr, unsigned char val, int count)
+{
+        u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
+        while ((unsigned long)addr & 3) {
+                writeb(val, addr++);
+                count--;
+        }
+        while (count > 3) {
+                __raw_writel(val32, addr);
+                addr += 4;
+                count -= 4;
+        }
+        while (count--) {
+                writeb(val, addr++);
+        }
+}
+/*
+ * Read COUNT 8-bit bytes from port PORT into memory starting at
+ * SRC.
+ */
+void insb (unsigned long port, void *dst, unsigned long count)
+{
+        unsigned char *p;
+        p = (unsigned char *)dst;
+        while (((unsigned long)p) & 0x3) {
+                if (!count)
+                        return;
+                count--;
+                *p = inb(port);
+                p++;
+        }
+        while (count >= 4) {
+                unsigned int w;
+                count -= 4;
+                w = inb(port) << 24;
+                w |= inb(port) << 16;
+                w |= inb(port) << 8;
+                w |= inb(port);
+                *(unsigned int *) p = w;
+                p += 4;
+        }
+        while (count) {
+                --count;
+                *p = inb(port);
+                p++;
+        }
+}
+/*
+ * Read COUNT 16-bit words from port PORT into memory starting at
+ * SRC.  SRC must be at least short aligned.  This is used by the
+ * IDE driver to read disk sectors.  Performance is important, but
+ * the interfaces seems to be slow: just using the inlined version
+ * of the inw() breaks things.
+ */
+void insw (unsigned long port, void *dst, unsigned long count)
+{
+        unsigned int l = 0, l2;
+        unsigned char *p;
+        p = (unsigned char *)dst;
+        
+        if (!count)
+                return;
+        
+        switch (((unsigned long)p) & 0x3)
+        {
+         case 0x00:                     /* Buffer 32-bit aligned */
+                while (count>=2) {
+                        
+                        count -= 2;
+                        l = cpu_to_le16(inw(port)) << 16;
+                        l |= cpu_to_le16(inw(port));
+                        *(unsigned int *)p = l;
+                        p += 4;
+                }
+                if (count) {
+                        *(unsigned short *)p = cpu_to_le16(inw(port));
+                }
+                break;
+        
+         case 0x02:                     /* Buffer 16-bit aligned */
+                *(unsigned short *)p = cpu_to_le16(inw(port));
+                p += 2;
+                count--;
+                while (count>=2) {
+                        
+                        count -= 2;
+                        l = cpu_to_le16(inw(port)) << 16;
+                        l |= cpu_to_le16(inw(port));
+                        *(unsigned int *)p = l;
+                        p += 4;
+                }
+                if (count) {
+                        *(unsigned short *)p = cpu_to_le16(inw(port));
+                }
+                break;
+                
+         case 0x01:                     /* Buffer 8-bit aligned */
+         case 0x03:
+                /* I don't bother with 32bit transfers
+                 * in this case, 16bit will have to do -- DE */
+                --count;
+                
+                l = cpu_to_le16(inw(port));
+                *p = l >> 8;
+                p++;
+                while (count--)
+                {
+                        l2 = cpu_to_le16(inw(port));
+                        *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
+                        p += 2;
+                        l = l2;
+                }
+                *p = l & 0xff;
+                break;
+        }
+}
+/*
+ * Read COUNT 32-bit words from port PORT into memory starting at
+ * SRC. Now works with any alignment in SRC. Performance is important,
+ * but the interfaces seems to be slow: just using the inlined version
+ * of the inl() breaks things.
+ */
+void insl (unsigned long port, void *dst, unsigned long count)
+{
+        unsigned int l = 0, l2;
+        unsigned char *p;
+        p = (unsigned char *)dst;
+        
+        if (!count)
+                return;
+        
+        switch (((unsigned long) dst) & 0x3)
+        {
+         case 0x00:                     /* Buffer 32-bit aligned */
+                while (count--)
+                {
+                        *(unsigned int *)p = cpu_to_le32(inl(port));
+                        p += 4;
+                }
+                break;
+        
+         case 0x02:                     /* Buffer 16-bit aligned */
+                --count;
+                
+                l = cpu_to_le32(inl(port));
+                *(unsigned short *)p = l >> 16;
+                p += 2;
+                
+                while (count--)
+                {
+                        l2 = cpu_to_le32(inl(port));
+                        *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
+                        p += 4;
+                        l = l2;
+                }
+                *(unsigned short *)p = l & 0xffff;
+                break;
+         case 0x01:                     /* Buffer 8-bit aligned */
+                --count;
+                
+                l = cpu_to_le32(inl(port));
+                *(unsigned char *)p = l >> 24;
+                p++;
+                *(unsigned short *)p = (l >> 8) & 0xffff;
+                p += 2;
+                while (count--)
+                {
+                        l2 = cpu_to_le32(inl(port));
+                        *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
+                        p += 4;
+                        l = l2;
+                }
+                *p = l & 0xff;
+                break;
+         case 0x03:                     /* Buffer 8-bit aligned */
+                --count;
+                
+                l = cpu_to_le32(inl(port));
+                *p = l >> 24;
+                p++;
+                while (count--)
+                {
+                        l2 = cpu_to_le32(inl(port));
+                        *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
+                        p += 4;
+                        l = l2;
+                }
+                *(unsigned short *)p = (l >> 8) & 0xffff;
+                p += 2;
+                *p = l & 0xff;
+                break;
+        }
+}
+/*
+ * Like insb but in the opposite direction.
+ * Don't worry as much about doing aligned memory transfers:
+ * doing byte reads the "slow" way isn't nearly as slow as
+ * doing byte writes the slow way (no r-m-w cycle).
+ */
+void outsb(unsigned long port, const void * src, unsigned long count)
+{
+        const unsigned char *p;
+        p = (const unsigned char *)src;
+        while (count) {
+                count--;
+                outb(*p, port);
+                p++;
+        }
+}
+/*
+ * Like insw but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Performance is important, but the
+ * interfaces seems to be slow: just using the inlined version of the
+ * outw() breaks things.
+ */
+void outsw (unsigned long port, const void *src, unsigned long count)
+{
+        unsigned int l = 0, l2;
+        const unsigned char *p;
+        p = (const unsigned char *)src;
+        
+        if (!count)
+                return;
+        
+        switch (((unsigned long)p) & 0x3)
+        {
+         case 0x00:                     /* Buffer 32-bit aligned */
+                while (count>=2) {
+                        count -= 2;
+                        l = *(unsigned int *)p;
+                        p += 4;
+                        outw(le16_to_cpu(l >> 16), port);
+                        outw(le16_to_cpu(l & 0xffff), port);
+                }
+                if (count) {
+                        outw(le16_to_cpu(*(unsigned short*)p), port);
+                }
+                break;
+        
+         case 0x02:                     /* Buffer 16-bit aligned */
+                
+                outw(le16_to_cpu(*(unsigned short*)p), port);
+                p += 2;
+                count--;
+                
+                while (count>=2) {
+                        count -= 2;
+                        l = *(unsigned int *)p;
+                        p += 4;
+                        outw(le16_to_cpu(l >> 16), port);
+                        outw(le16_to_cpu(l & 0xffff), port);
+                }
+                if (count) {
+                        outw(le16_to_cpu(*(unsigned short *)p), port);
+                }
+                break;
+                
+         case 0x01:                     /* Buffer 8-bit aligned */      
+                /* I don't bother with 32bit transfers
+                 * in this case, 16bit will have to do -- DE */
+                
+                l  = *p << 8;
+                p++;
+                count--;
+                while (count)
+                {
+                        count--;
+                        l2 = *(unsigned short *)p;
+                        p += 2;
+                        outw(le16_to_cpu(l | l2 >> 8), port);
+                        l = l2 << 8;
+                }
+                l2 = *(unsigned char *)p;
+                outw (le16_to_cpu(l | l2>>8), port);
+                break;
+        
+        }
+}
+/*
+ * Like insl but in the opposite direction.  This is used by the IDE
+ * driver to write disk sectors.  Works with any alignment in SRC.
+ *  Performance is important, but the interfaces seems to be slow:
+ * just using the inlined version of the outl() breaks things.
+ */
+void outsl (unsigned long port, const void *src, unsigned long count)
+{
+        unsigned int l = 0, l2;
+        const unsigned char *p;
+        p = (const unsigned char *)src;
+        
+        if (!count)
+                return;
+        
+        switch (((unsigned long)p) & 0x3)
+        {
+         case 0x00:                     /* Buffer 32-bit aligned */
+                while (count--)
+                {
+                        outl(le32_to_cpu(*(unsigned int *)p), port);
+                        p += 4;
+                }
+                break;
+        
+         case 0x02:                     /* Buffer 16-bit aligned */
+                --count;
+                
+                l = *(unsigned short *)p;
+                p += 2;
+                
+                while (count--)
+                {
+                        l2 = *(unsigned int *)p;
+                        p += 4;
+                        outl (le32_to_cpu(l << 16 | l2 >> 16), port);
+                        l = l2;
+                }
+                l2 = *(unsigned short *)p;
+                outl (le32_to_cpu(l << 16 | l2), port);
+                break;
+         case 0x01:                     /* Buffer 8-bit aligned */
+                --count;
+                l = *p << 24;
+                p++;
+                l |= *(unsigned short *)p << 8;
+                p += 2;
+                while (count--)
+                {
+                        l2 = *(unsigned int *)p;
+                        p += 4;
+                        outl (le32_to_cpu(l | l2 >> 24), port);
+                        l = l2 << 8;
+                }
+                l2 = *p;
+                outl (le32_to_cpu(l | l2), port);
+                break;
+         case 0x03:                     /* Buffer 8-bit aligned */
+                --count;
+                
+                l = *p << 24;
+                p++;
+                while (count--)
+                {
+                        l2 = *(unsigned int *)p;
+                        p += 4;
+                        outl (le32_to_cpu(l | l2 >> 8), port);
+                        l = l2 << 24;
+                }
+                l2 = *(unsigned short *)p << 16;
+                p += 2;
+                l2 |= *p;
+                outl (le32_to_cpu(l | l2), port);
+                break;
+        }
+}
+EXPORT_SYMBOL(insb);
+EXPORT_SYMBOL(insw);
+EXPORT_SYMBOL(insl);
+EXPORT_SYMBOL(outsb);
+EXPORT_SYMBOL(outsw);
+EXPORT_SYMBOL(outsl);

diff --git a/arch/parisc/lib/io.c b/arch/parisc/lib/io.c new file mode 100644 index 000000000000..7c1406ff825e --- /dev/null +++ b/arch/parisc/lib/io.c
@@ -0,0 +1,488 @@
	1	/*
	2	* arch/parisc/lib/io.c
	3	*
	4	* Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
	5	* Copyright (c) Randolph Chung 2001 <tausq@debian.org>
	6	*
	7	* IO accessing functions which shouldn't be inlined because they're too big
	8	*/
	9
	10	#include <linux/kernel.h>
	11	#include <linux/module.h>
	12	#include <asm/io.h>
	13
	14	/* Copies a block of memory to a device in an efficient manner.
	15	* Assumes the device can cope with 32-bit transfers. If it can't,
	16	* don't use this function.
	17	*/
	18	void memcpy_toio(volatile void __iomem dst, const void src, int count)
	19	{
	20	if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
	21	goto bytecopy;
	22	while ((unsigned long)dst & 3) {
	23	writeb((char )src, dst++);
	24	src++;
	25	count--;
	26	}
	27	while (count > 3) {
	28	__raw_writel((u32 )src, dst);
	29	src += 4;
	30	dst += 4;
	31	count -= 4;
	32	}
	33	bytecopy:
	34	while (count--) {
	35	writeb((char )src, dst++);
	36	src++;
	37	}
	38	}
	39
	40	/*
	41	** Copies a block of memory from a device in an efficient manner.
	42	** Assumes the device can cope with 32-bit transfers. If it can't,
	43	** don't use this function.
	44	**
	45	** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
	46	** 27341/64 = 427 cyc per int
	47	** 61311/128 = 478 cyc per short
	48	** 122637/256 = 479 cyc per byte
	49	** Ergo bus latencies dominant (not transfer size).
	50	** Minimize total number of transfers at cost of CPU cycles.
	51	** TODO: only look at src alignment and adjust the stores to dest.
	52	*/
	53	void memcpy_fromio(void dst, const volatile void __iomem src, int count)
	54	{
	55	/* first compare alignment of src/dst */
	56	if ( (((unsigned long)dst ^ (unsigned long)src) & 1) \|\| (count < 2) )
	57	goto bytecopy;
	58
	59	if ( (((unsigned long)dst ^ (unsigned long)src) & 2) \|\| (count < 4) )
	60	goto shortcopy;
	61
	62	/* Then check for misaligned start address */
	63	if ((unsigned long)src & 1) {
	64	(u8 )dst = readb(src);
	65	src++;
	66	dst++;
	67	count--;
	68	if (count < 2) goto bytecopy;
	69	}
	70
	71	if ((unsigned long)src & 2) {
	72	(u16 )dst = __raw_readw(src);
	73	src += 2;
	74	dst += 2;
	75	count -= 2;
	76	}
	77
	78	while (count > 3) {
	79	(u32 )dst = __raw_readl(src);
	80	dst += 4;
	81	src += 4;
	82	count -= 4;
	83	}
	84
	85	shortcopy:
	86	while (count > 1) {
	87	(u16 )dst = __raw_readw(src);
	88	src += 2;
	89	dst += 2;
	90	count -= 2;
	91	}
	92
	93	bytecopy:
	94	while (count--) {
	95	(char )dst = readb(src);
	96	src++;
	97	dst++;
	98	}
	99	}
	100
	101	/* Sets a block of memory on a device to a given value.
	102	* Assumes the device can cope with 32-bit transfers. If it can't,
	103	* don't use this function.
	104	*/
	105	void memset_io(volatile void __iomem *addr, unsigned char val, int count)
	106	{
	107	u32 val32 = (val << 24) \| (val << 16) \| (val << 8) \| val;
	108	while ((unsigned long)addr & 3) {
	109	writeb(val, addr++);
	110	count--;
	111	}
	112	while (count > 3) {
	113	__raw_writel(val32, addr);
	114	addr += 4;
	115	count -= 4;
	116	}
	117	while (count--) {
	118	writeb(val, addr++);
	119	}
	120	}
	121
	122	/*
	123	* Read COUNT 8-bit bytes from port PORT into memory starting at
	124	* SRC.
	125	*/
	126	void insb (unsigned long port, void *dst, unsigned long count)
	127	{
	128	unsigned char *p;
	129
	130	p = (unsigned char *)dst;
	131
	132	while (((unsigned long)p) & 0x3) {
	133	if (!count)
	134	return;
	135	count--;
	136	*p = inb(port);
	137	p++;
	138	}
	139
	140	while (count >= 4) {
	141	unsigned int w;
	142	count -= 4;
	143	w = inb(port) << 24;
	144	w \|= inb(port) << 16;
	145	w \|= inb(port) << 8;
	146	w \|= inb(port);
	147	(unsigned int ) p = w;
	148	p += 4;
	149	}
	150
	151	while (count) {
	152	--count;
	153	*p = inb(port);
	154	p++;
	155	}
	156	}
	157
	158
	159	/*
	160	* Read COUNT 16-bit words from port PORT into memory starting at
	161	* SRC. SRC must be at least short aligned. This is used by the
	162	* IDE driver to read disk sectors. Performance is important, but
	163	* the interfaces seems to be slow: just using the inlined version
	164	* of the inw() breaks things.
	165	*/
	166	void insw (unsigned long port, void *dst, unsigned long count)
	167	{
	168	unsigned int l = 0, l2;
	169	unsigned char *p;
	170
	171	p = (unsigned char *)dst;
	172
	173	if (!count)
	174	return;
	175
	176	switch (((unsigned long)p) & 0x3)
	177	{
	178	case 0x00: /* Buffer 32-bit aligned */
	179	while (count>=2) {
	180
	181	count -= 2;
	182	l = cpu_to_le16(inw(port)) << 16;
	183	l \|= cpu_to_le16(inw(port));
	184	(unsigned int )p = l;
	185	p += 4;
	186	}
	187	if (count) {
	188	(unsigned short )p = cpu_to_le16(inw(port));
	189	}
	190	break;
	191
	192	case 0x02: /* Buffer 16-bit aligned */
	193	(unsigned short )p = cpu_to_le16(inw(port));
	194	p += 2;
	195	count--;
	196	while (count>=2) {
	197
	198	count -= 2;
	199	l = cpu_to_le16(inw(port)) << 16;
	200	l \|= cpu_to_le16(inw(port));
	201	(unsigned int )p = l;
	202	p += 4;
	203	}
	204	if (count) {
	205	(unsigned short )p = cpu_to_le16(inw(port));
	206	}
	207	break;
	208
	209	case 0x01: /* Buffer 8-bit aligned */
	210	case 0x03:
	211	/* I don't bother with 32bit transfers
	212	* in this case, 16bit will have to do -- DE */
	213	--count;
	214
	215	l = cpu_to_le16(inw(port));
	216	*p = l >> 8;
	217	p++;
	218	while (count--)
	219	{
	220	l2 = cpu_to_le16(inw(port));
	221	(unsigned short )p = (l & 0xff) << 8 \| (l2 >> 8);
	222	p += 2;
	223	l = l2;
	224	}
	225	*p = l & 0xff;
	226	break;
	227	}
	228	}
	229
	230
	231
	232	/*
	233	* Read COUNT 32-bit words from port PORT into memory starting at
	234	* SRC. Now works with any alignment in SRC. Performance is important,
	235	* but the interfaces seems to be slow: just using the inlined version
	236	* of the inl() breaks things.
	237	*/
	238	void insl (unsigned long port, void *dst, unsigned long count)
	239	{
	240	unsigned int l = 0, l2;
	241	unsigned char *p;
	242
	243	p = (unsigned char *)dst;
	244
	245	if (!count)
	246	return;
	247
	248	switch (((unsigned long) dst) & 0x3)
	249	{
	250	case 0x00: /* Buffer 32-bit aligned */
	251	while (count--)
	252	{
	253	(unsigned int )p = cpu_to_le32(inl(port));
	254	p += 4;
	255	}
	256	break;
	257
	258	case 0x02: /* Buffer 16-bit aligned */
	259	--count;
	260
	261	l = cpu_to_le32(inl(port));
	262	(unsigned short )p = l >> 16;
	263	p += 2;
	264
	265	while (count--)
	266	{
	267	l2 = cpu_to_le32(inl(port));
	268	(unsigned int )p = (l & 0xffff) << 16 \| (l2 >> 16);
	269	p += 4;
	270	l = l2;
	271	}
	272	(unsigned short )p = l & 0xffff;
	273	break;
	274	case 0x01: /* Buffer 8-bit aligned */
	275	--count;
	276
	277	l = cpu_to_le32(inl(port));
	278	(unsigned char )p = l >> 24;
	279	p++;
	280	(unsigned short )p = (l >> 8) & 0xffff;
	281	p += 2;
	282	while (count--)
	283	{
	284	l2 = cpu_to_le32(inl(port));
	285	(unsigned int )p = (l & 0xff) << 24 \| (l2 >> 8);
	286	p += 4;
	287	l = l2;
	288	}
	289	*p = l & 0xff;
	290	break;
	291	case 0x03: /* Buffer 8-bit aligned */
	292	--count;
	293
	294	l = cpu_to_le32(inl(port));
	295	*p = l >> 24;
	296	p++;
	297	while (count--)
	298	{
	299	l2 = cpu_to_le32(inl(port));
	300	(unsigned int )p = (l & 0xffffff) << 8 \| l2 >> 24;
	301	p += 4;
	302	l = l2;
	303	}
	304	(unsigned short )p = (l >> 8) & 0xffff;
	305	p += 2;
	306	*p = l & 0xff;
	307	break;
	308	}
	309	}
	310
	311
	312	/*
	313	* Like insb but in the opposite direction.
	314	* Don't worry as much about doing aligned memory transfers:
	315	* doing byte reads the "slow" way isn't nearly as slow as
	316	* doing byte writes the slow way (no r-m-w cycle).
	317	*/
	318	void outsb(unsigned long port, const void * src, unsigned long count)
	319	{
	320	const unsigned char *p;
	321
	322	p = (const unsigned char *)src;
	323	while (count) {
	324	count--;
	325	outb(*p, port);
	326	p++;
	327	}
	328	}
	329
	330	/*
	331	* Like insw but in the opposite direction. This is used by the IDE
	332	* driver to write disk sectors. Performance is important, but the
	333	* interfaces seems to be slow: just using the inlined version of the
	334	* outw() breaks things.
	335	*/
	336	void outsw (unsigned long port, const void *src, unsigned long count)
	337	{
	338	unsigned int l = 0, l2;
	339	const unsigned char *p;
	340
	341	p = (const unsigned char *)src;
	342
	343	if (!count)
	344	return;
	345
	346	switch (((unsigned long)p) & 0x3)
	347	{
	348	case 0x00: /* Buffer 32-bit aligned */
	349	while (count>=2) {
	350	count -= 2;
	351	l = (unsigned int )p;
	352	p += 4;
	353	outw(le16_to_cpu(l >> 16), port);
	354	outw(le16_to_cpu(l & 0xffff), port);
	355	}
	356	if (count) {
	357	outw(le16_to_cpu((unsigned short)p), port);
	358	}
	359	break;
	360
	361	case 0x02: /* Buffer 16-bit aligned */
	362
	363	outw(le16_to_cpu((unsigned short)p), port);
	364	p += 2;
	365	count--;
	366
	367	while (count>=2) {
	368	count -= 2;
	369	l = (unsigned int )p;
	370	p += 4;
	371	outw(le16_to_cpu(l >> 16), port);
	372	outw(le16_to_cpu(l & 0xffff), port);
	373	}
	374	if (count) {
	375	outw(le16_to_cpu((unsigned short )p), port);
	376	}
	377	break;
	378
	379	case 0x01: /* Buffer 8-bit aligned */
	380	/* I don't bother with 32bit transfers
	381	* in this case, 16bit will have to do -- DE */
	382
	383	l = *p << 8;
	384	p++;
	385	count--;
	386	while (count)
	387	{
	388	count--;
	389	l2 = (unsigned short )p;
	390	p += 2;
	391	outw(le16_to_cpu(l \| l2 >> 8), port);
	392	l = l2 << 8;
	393	}
	394	l2 = (unsigned char )p;
	395	outw (le16_to_cpu(l \| l2>>8), port);
	396	break;
	397
	398	}
	399	}
	400
	401
	402	/*
	403	* Like insl but in the opposite direction. This is used by the IDE
	404	* driver to write disk sectors. Works with any alignment in SRC.
	405	* Performance is important, but the interfaces seems to be slow:
	406	* just using the inlined version of the outl() breaks things.
	407	*/
	408	void outsl (unsigned long port, const void *src, unsigned long count)
	409	{
	410	unsigned int l = 0, l2;
	411	const unsigned char *p;
	412
	413	p = (const unsigned char *)src;
	414
	415	if (!count)
	416	return;
	417
	418	switch (((unsigned long)p) & 0x3)
	419	{
	420	case 0x00: /* Buffer 32-bit aligned */
	421	while (count--)
	422	{
	423	outl(le32_to_cpu((unsigned int )p), port);
	424	p += 4;
	425	}
	426	break;
	427
	428	case 0x02: /* Buffer 16-bit aligned */
	429	--count;
	430
	431	l = (unsigned short )p;
	432	p += 2;
	433
	434	while (count--)
	435	{
	436	l2 = (unsigned int )p;
	437	p += 4;
	438	outl (le32_to_cpu(l << 16 \| l2 >> 16), port);
	439	l = l2;
	440	}
	441	l2 = (unsigned short )p;
	442	outl (le32_to_cpu(l << 16 \| l2), port);
	443	break;
	444	case 0x01: /* Buffer 8-bit aligned */
	445	--count;
	446
	447	l = *p << 24;
	448	p++;
	449	l \|= (unsigned short )p << 8;
	450	p += 2;
	451
	452	while (count--)
	453	{
	454	l2 = (unsigned int )p;
	455	p += 4;
	456	outl (le32_to_cpu(l \| l2 >> 24), port);
	457	l = l2 << 8;
	458	}
	459	l2 = *p;
	460	outl (le32_to_cpu(l \| l2), port);
	461	break;
	462	case 0x03: /* Buffer 8-bit aligned */
	463	--count;
	464
	465	l = *p << 24;
	466	p++;
	467
	468	while (count--)
	469	{
	470	l2 = (unsigned int )p;
	471	p += 4;
	472	outl (le32_to_cpu(l \| l2 >> 8), port);
	473	l = l2 << 24;
	474	}
	475	l2 = (unsigned short )p << 16;
	476	p += 2;
	477	l2 \|= *p;
	478	outl (le32_to_cpu(l \| l2), port);
	479	break;
	480	}
	481	}
	482
	483	EXPORT_SYMBOL(insb);
	484	EXPORT_SYMBOL(insw);
	485	EXPORT_SYMBOL(insl);
	486	EXPORT_SYMBOL(outsb);
	487	EXPORT_SYMBOL(outsw);
	488	EXPORT_SYMBOL(outsl);