aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2005-09-03 18:56:36 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:06:11 -0400
commit4bb0d3ec3e5b1e9e2399cdc641b3b6521ac9cdaa (patch)
tree5e8d7646f5c6a2cec990b6d591f230d496b20664
parent2a0694d15d55d0deed928786a6393d5e45e37d76 (diff)
[PATCH] i386: inline asm cleanup
i386 Inline asm cleanup. Use cr/dr accessor functions. Also, a potential bugfix. Also, some CR accessors really should be volatile. Reads from CR0 (numeric state may change in an exception handler), writes to CR4 (flipping CR4.TSD) and reads from CR2 (page fault) prevent instruction re-ordering. I did not add memory clobber to CR3 / CR4 / CR0 updates, as it was not there to begin with, and in no case should kernel memory be clobbered, except when doing a TLB flush, which already has memory clobber. I noticed that page invalidation does not have a memory clobber. I can't find a bug as a result, but there is definitely a potential for a bug here: #define __flush_tlb_single(addr) \ __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/kernel/cpu/common.c12
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c12
-rw-r--r--arch/i386/kernel/cpu/cyrix.c6
-rw-r--r--arch/i386/kernel/efi.c4
-rw-r--r--arch/i386/kernel/machine_kexec.c8
-rw-r--r--arch/i386/kernel/process.c16
-rw-r--r--arch/i386/kernel/smp.c2
-rw-r--r--arch/i386/mm/fault.c6
-rw-r--r--arch/i386/mm/pageattr.c2
-rw-r--r--arch/i386/power/cpu.c16
-rw-r--r--include/asm-i386/agp.h2
-rw-r--r--include/asm-i386/bugs.h5
-rw-r--r--include/asm-i386/processor.h22
-rw-r--r--include/asm-i386/system.h28
-rw-r--r--include/asm-i386/xor.h26
15 files changed, 84 insertions, 83 deletions
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 4553ffd94b1..361f2e7ccb1 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -642,12 +642,12 @@ void __devinit cpu_init(void)
642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 642 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
643 643
644 /* Clear all 6 debug registers: */ 644 /* Clear all 6 debug registers: */
645 645 set_debugreg(0, 0);
646#define CD(register) set_debugreg(0, register) 646 set_debugreg(0, 1);
647 647 set_debugreg(0, 2);
648 CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); 648 set_debugreg(0, 3);
649 649 set_debugreg(0, 6);
650#undef CD 650 set_debugreg(0, 7);
651 651
652 /* 652 /*
653 * Force FPU initialization: 653 * Force FPU initialization:
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 04e3563da4f..bf02b5026e6 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -64,8 +64,6 @@ static int dont_scale_voltage;
64#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 64#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
65 65
66 66
67#define __hlt() __asm__ __volatile__("hlt": : :"memory")
68
69/* Clock ratios multiplied by 10 */ 67/* Clock ratios multiplied by 10 */
70static int clock_ratio[32]; 68static int clock_ratio[32];
71static int eblcr_table[32]; 69static int eblcr_table[32];
@@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul,
168 outb(0xFE,0x21); /* TMR0 only */ 166 outb(0xFE,0x21); /* TMR0 only */
169 outb(0xFF,0x80); /* delay */ 167 outb(0xFF,0x80); /* delay */
170 168
171 local_irq_enable(); 169 safe_halt();
172
173 __hlt();
174 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); 170 wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
175 __hlt(); 171 halt();
176 172
177 local_irq_disable(); 173 local_irq_disable();
178 174
@@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
251 bcr2.bits.CLOCKMUL = clock_ratio_index; 247 bcr2.bits.CLOCKMUL = clock_ratio_index;
252 local_irq_disable(); 248 local_irq_disable();
253 wrmsrl (MSR_VIA_BCR2, bcr2.val); 249 wrmsrl (MSR_VIA_BCR2, bcr2.val);
254 local_irq_enable(); 250 safe_halt();
255
256 __hlt();
257 251
258 /* Disable software clock multiplier */ 252 /* Disable software clock multiplier */
259 rdmsrl (MSR_VIA_BCR2, bcr2.val); 253 rdmsrl (MSR_VIA_BCR2, bcr2.val);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index ba4b01138c8..ff87cc22b32 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void)
132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); 132 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
133 /* set 'Not Write-through' */ 133 /* set 'Not Write-through' */
134 cr0 = 0x20000000; 134 cr0 = 0x20000000;
135 __asm__("movl %%cr0,%%eax\n\t" 135 write_cr0(read_cr0() | cr0);
136 "orl %0,%%eax\n\t"
137 "movl %%eax,%%cr0\n"
138 : : "r" (cr0)
139 :"ax");
140 /* CCR2 bit 2: lock NW bit and set WT1 */ 136 /* CCR2 bit 2: lock NW bit and set WT1 */
141 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); 137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
142} 138}
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 850648ae830..921fdb15fc9 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void)
79 * directory. If I have PSE, I just need to duplicate one entry in 79 * directory. If I have PSE, I just need to duplicate one entry in
80 * page directory. 80 * page directory.
81 */ 81 */
82 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 82 cr4 = read_cr4();
83 83
84 if (cr4 & X86_CR4_PSE) { 84 if (cr4 & X86_CR4_PSE) {
85 efi_bak_pg_dir_pointer[0].pgd = 85 efi_bak_pg_dir_pointer[0].pgd =
@@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void)
115 cpu_gdt_descr[0].address = 115 cpu_gdt_descr[0].address =
116 (unsigned long) __va(cpu_gdt_descr[0].address); 116 (unsigned long) __va(cpu_gdt_descr[0].address);
117 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); 117 __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
118 __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); 118 cr4 = read_cr4();
119 119
120 if (cr4 & X86_CR4_PSE) { 120 if (cr4 & X86_CR4_PSE) {
121 swapper_pg_dir[pgd_index(0)].pgd = 121 swapper_pg_dir[pgd_index(0)].pgd =
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index cb699a2aa1f..f19f6d34bcb 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -17,13 +17,7 @@
17#include <asm/apic.h> 17#include <asm/apic.h>
18#include <asm/cpufeature.h> 18#include <asm/cpufeature.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20 20#include <asm/system.h>
21static inline unsigned long read_cr3(void)
22{
23 unsigned long cr3;
24 asm volatile("movl %%cr3,%0": "=r"(cr3));
25 return cr3;
26}
27 21
28#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 22#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
29 23
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index e3f362e8af5..761d4ed47ef 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs)
313 printk(" DS: %04x ES: %04x\n", 313 printk(" DS: %04x ES: %04x\n",
314 0xffff & regs->xds,0xffff & regs->xes); 314 0xffff & regs->xds,0xffff & regs->xes);
315 315
316 __asm__("movl %%cr0, %0": "=r" (cr0)); 316 cr0 = read_cr0();
317 __asm__("movl %%cr2, %0": "=r" (cr2)); 317 cr2 = read_cr2();
318 __asm__("movl %%cr3, %0": "=r" (cr3)); 318 cr3 = read_cr3();
319 /* This could fault if %cr4 does not exist */ 319 if (current_cpu_data.x86 > 4) {
320 __asm__("1: movl %%cr4, %0 \n" 320 cr4 = read_cr4();
321 "2: \n" 321 }
322 ".section __ex_table,\"a\" \n"
323 ".long 1b,2b \n"
324 ".previous \n"
325 : "=r" (cr4): "0" (0));
326 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); 322 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
327 show_trace(NULL, &regs->esp); 323 show_trace(NULL, &regs->esp);
328} 324}
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index cec4bde6716..48b55db3680 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy)
576 local_irq_disable(); 576 local_irq_disable();
577 disable_local_APIC(); 577 disable_local_APIC();
578 if (cpu_data[smp_processor_id()].hlt_works_ok) 578 if (cpu_data[smp_processor_id()].hlt_works_ok)
579 for(;;) __asm__("hlt"); 579 for(;;) halt();
580 for (;;); 580 for (;;);
581} 581}
582 582
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 61d9e34af5a..411b8500ad1 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -233,7 +233,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
233 int write, si_code; 233 int write, si_code;
234 234
235 /* get the address */ 235 /* get the address */
236 __asm__("movl %%cr2,%0":"=r" (address)); 236 address = read_cr2();
237 237
238 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 238 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
239 SIGSEGV) == NOTIFY_STOP) 239 SIGSEGV) == NOTIFY_STOP)
@@ -453,7 +453,7 @@ no_context:
453 printk(" at virtual address %08lx\n",address); 453 printk(" at virtual address %08lx\n",address);
454 printk(KERN_ALERT " printing eip:\n"); 454 printk(KERN_ALERT " printing eip:\n");
455 printk("%08lx\n", regs->eip); 455 printk("%08lx\n", regs->eip);
456 asm("movl %%cr3,%0":"=r" (page)); 456 page = read_cr3();
457 page = ((unsigned long *) __va(page))[address >> 22]; 457 page = ((unsigned long *) __va(page))[address >> 22];
458 printk(KERN_ALERT "*pde = %08lx\n", page); 458 printk(KERN_ALERT "*pde = %08lx\n", page);
459 /* 459 /*
@@ -526,7 +526,7 @@ vmalloc_fault:
526 pmd_t *pmd, *pmd_k; 526 pmd_t *pmd, *pmd_k;
527 pte_t *pte_k; 527 pte_t *pte_k;
528 528
529 asm("movl %%cr3,%0":"=r" (pgd_paddr)); 529 pgd_paddr = read_cr3();
530 pgd = index + (pgd_t *)__va(pgd_paddr); 530 pgd = index + (pgd_t *)__va(pgd_paddr);
531 pgd_k = init_mm.pgd + index; 531 pgd_k = init_mm.pgd + index;
532 532
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index cb3da6baa70..bce06a79eaf 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -62,7 +62,7 @@ static void flush_kernel_map(void *dummy)
62{ 62{
63 /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ 63 /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
64 if (boot_cpu_data.x86_model >= 4) 64 if (boot_cpu_data.x86_model >= 4)
65 asm volatile("wbinvd":::"memory"); 65 wbinvd();
66 /* Flush all to work around Errata in early athlons regarding 66 /* Flush all to work around Errata in early athlons regarding
67 * large page flushing. 67 * large page flushing.
68 */ 68 */
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index c547c1af6fa..4e19c43e095 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -57,10 +57,10 @@ void __save_processor_state(struct saved_context *ctxt)
57 /* 57 /*
58 * control registers 58 * control registers
59 */ 59 */
60 asm volatile ("movl %%cr0, %0" : "=r" (ctxt->cr0)); 60 ctxt->cr0 = read_cr0();
61 asm volatile ("movl %%cr2, %0" : "=r" (ctxt->cr2)); 61 ctxt->cr2 = read_cr2();
62 asm volatile ("movl %%cr3, %0" : "=r" (ctxt->cr3)); 62 ctxt->cr3 = read_cr3();
63 asm volatile ("movl %%cr4, %0" : "=r" (ctxt->cr4)); 63 ctxt->cr4 = read_cr4();
64} 64}
65 65
66void save_processor_state(void) 66void save_processor_state(void)
@@ -109,10 +109,10 @@ void __restore_processor_state(struct saved_context *ctxt)
109 /* 109 /*
110 * control registers 110 * control registers
111 */ 111 */
112 asm volatile ("movl %0, %%cr4" :: "r" (ctxt->cr4)); 112 write_cr4(ctxt->cr4);
113 asm volatile ("movl %0, %%cr3" :: "r" (ctxt->cr3)); 113 write_cr3(ctxt->cr3);
114 asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2)); 114 write_cr2(ctxt->cr2);
115 asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0)); 115 write_cr2(ctxt->cr0);
116 116
117 /* 117 /*
118 * now restore the descriptor tables to their proper values 118 * now restore the descriptor tables to their proper values
diff --git a/include/asm-i386/agp.h b/include/asm-i386/agp.h
index b82f5f3ab88..9075083bab7 100644
--- a/include/asm-i386/agp.h
+++ b/include/asm-i386/agp.h
@@ -19,7 +19,7 @@ int unmap_page_from_agp(struct page *page);
19/* Could use CLFLUSH here if the cpu supports it. But then it would 19/* Could use CLFLUSH here if the cpu supports it. But then it would
20 need to be called for each cacheline of the whole page so it may not be 20 need to be called for each cacheline of the whole page so it may not be
21 worth it. Would need a page for it. */ 21 worth it. Would need a page for it. */
22#define flush_agp_cache() asm volatile("wbinvd":::"memory") 22#define flush_agp_cache() wbinvd()
23 23
24/* Convert a physical address to an address suitable for the GART. */ 24/* Convert a physical address to an address suitable for the GART. */
25#define phys_to_gart(x) (x) 25#define phys_to_gart(x) (x)
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h
index 6789fc275da..ea54540638d 100644
--- a/include/asm-i386/bugs.h
+++ b/include/asm-i386/bugs.h
@@ -118,7 +118,10 @@ static void __init check_hlt(void)
118 printk("disabled\n"); 118 printk("disabled\n");
119 return; 119 return;
120 } 120 }
121 __asm__ __volatile__("hlt ; hlt ; hlt ; hlt"); 121 halt();
122 halt();
123 halt();
124 halt();
122 printk("OK.\n"); 125 printk("OK.\n");
123} 126}
124 127
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index d0d8b016009..7e17d3b4f65 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -203,9 +203,7 @@ static inline unsigned int cpuid_edx(unsigned int op)
203 return edx; 203 return edx;
204} 204}
205 205
206#define load_cr3(pgdir) \ 206#define load_cr3(pgdir) write_cr3(__pa(pgdir))
207 asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir)))
208
209 207
210/* 208/*
211 * Intel CPU features in CR4 209 * Intel CPU features in CR4
@@ -232,22 +230,20 @@ extern unsigned long mmu_cr4_features;
232 230
233static inline void set_in_cr4 (unsigned long mask) 231static inline void set_in_cr4 (unsigned long mask)
234{ 232{
233 unsigned cr4;
235 mmu_cr4_features |= mask; 234 mmu_cr4_features |= mask;
236 __asm__("movl %%cr4,%%eax\n\t" 235 cr4 = read_cr4();
237 "orl %0,%%eax\n\t" 236 cr4 |= mask;
238 "movl %%eax,%%cr4\n" 237 write_cr4(cr4);
239 : : "irg" (mask)
240 :"ax");
241} 238}
242 239
243static inline void clear_in_cr4 (unsigned long mask) 240static inline void clear_in_cr4 (unsigned long mask)
244{ 241{
242 unsigned cr4;
245 mmu_cr4_features &= ~mask; 243 mmu_cr4_features &= ~mask;
246 __asm__("movl %%cr4,%%eax\n\t" 244 cr4 = read_cr4();
247 "andl %0,%%eax\n\t" 245 cr4 &= ~mask;
248 "movl %%eax,%%cr4\n" 246 write_cr4(cr4);
249 : : "irg" (~mask)
250 :"ax");
251} 247}
252 248
253/* 249/*
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index 3db717a244f..8048a5e018c 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -107,13 +107,33 @@ static inline unsigned long _get_base(char * addr)
107#define clts() __asm__ __volatile__ ("clts") 107#define clts() __asm__ __volatile__ ("clts")
108#define read_cr0() ({ \ 108#define read_cr0() ({ \
109 unsigned int __dummy; \ 109 unsigned int __dummy; \
110 __asm__( \ 110 __asm__ __volatile__( \
111 "movl %%cr0,%0\n\t" \ 111 "movl %%cr0,%0\n\t" \
112 :"=r" (__dummy)); \ 112 :"=r" (__dummy)); \
113 __dummy; \ 113 __dummy; \
114}) 114})
115#define write_cr0(x) \ 115#define write_cr0(x) \
116 __asm__("movl %0,%%cr0": :"r" (x)); 116 __asm__ __volatile__("movl %0,%%cr0": :"r" (x));
117
118#define read_cr2() ({ \
119 unsigned int __dummy; \
120 __asm__ __volatile__( \
121 "movl %%cr2,%0\n\t" \
122 :"=r" (__dummy)); \
123 __dummy; \
124})
125#define write_cr2(x) \
126 __asm__ __volatile__("movl %0,%%cr2": :"r" (x));
127
128#define read_cr3() ({ \
129 unsigned int __dummy; \
130 __asm__ ( \
131 "movl %%cr3,%0\n\t" \
132 :"=r" (__dummy)); \
133 __dummy; \
134})
135#define write_cr3(x) \
136 __asm__ __volatile__("movl %0,%%cr3": :"r" (x));
117 137
118#define read_cr4() ({ \ 138#define read_cr4() ({ \
119 unsigned int __dummy; \ 139 unsigned int __dummy; \
@@ -123,7 +143,7 @@ static inline unsigned long _get_base(char * addr)
123 __dummy; \ 143 __dummy; \
124}) 144})
125#define write_cr4(x) \ 145#define write_cr4(x) \
126 __asm__("movl %0,%%cr4": :"r" (x)); 146 __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
127#define stts() write_cr0(8 | read_cr0()) 147#define stts() write_cr0(8 | read_cr0())
128 148
129#endif /* __KERNEL__ */ 149#endif /* __KERNEL__ */
@@ -447,6 +467,8 @@ struct alt_instr {
447#define local_irq_enable() __asm__ __volatile__("sti": : :"memory") 467#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
448/* used in the idle loop; sti takes one instruction cycle to complete */ 468/* used in the idle loop; sti takes one instruction cycle to complete */
449#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") 469#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
470/* used when interrupts are already enabled or to shutdown the processor */
471#define halt() __asm__ __volatile__("hlt": : :"memory")
450 472
451#define irqs_disabled() \ 473#define irqs_disabled() \
452({ \ 474({ \
diff --git a/include/asm-i386/xor.h b/include/asm-i386/xor.h
index f80e2dbe1b5..23c86cef3b2 100644
--- a/include/asm-i386/xor.h
+++ b/include/asm-i386/xor.h
@@ -535,14 +535,14 @@ static struct xor_block_template xor_block_p5_mmx = {
535 535
536#define XMMS_SAVE do { \ 536#define XMMS_SAVE do { \
537 preempt_disable(); \ 537 preempt_disable(); \
538 cr0 = read_cr0(); \
539 clts(); \
538 __asm__ __volatile__ ( \ 540 __asm__ __volatile__ ( \
539 "movl %%cr0,%0 ;\n\t" \ 541 "movups %%xmm0,(%0) ;\n\t" \
540 "clts ;\n\t" \ 542 "movups %%xmm1,0x10(%0) ;\n\t" \
541 "movups %%xmm0,(%1) ;\n\t" \ 543 "movups %%xmm2,0x20(%0) ;\n\t" \
542 "movups %%xmm1,0x10(%1) ;\n\t" \ 544 "movups %%xmm3,0x30(%0) ;\n\t" \
543 "movups %%xmm2,0x20(%1) ;\n\t" \ 545 : \
544 "movups %%xmm3,0x30(%1) ;\n\t" \
545 : "=&r" (cr0) \
546 : "r" (xmm_save) \ 546 : "r" (xmm_save) \
547 : "memory"); \ 547 : "memory"); \
548} while(0) 548} while(0)
@@ -550,14 +550,14 @@ static struct xor_block_template xor_block_p5_mmx = {
550#define XMMS_RESTORE do { \ 550#define XMMS_RESTORE do { \
551 __asm__ __volatile__ ( \ 551 __asm__ __volatile__ ( \
552 "sfence ;\n\t" \ 552 "sfence ;\n\t" \
553 "movups (%1),%%xmm0 ;\n\t" \ 553 "movups (%0),%%xmm0 ;\n\t" \
554 "movups 0x10(%1),%%xmm1 ;\n\t" \ 554 "movups 0x10(%0),%%xmm1 ;\n\t" \
555 "movups 0x20(%1),%%xmm2 ;\n\t" \ 555 "movups 0x20(%0),%%xmm2 ;\n\t" \
556 "movups 0x30(%1),%%xmm3 ;\n\t" \ 556 "movups 0x30(%0),%%xmm3 ;\n\t" \
557 "movl %0,%%cr0 ;\n\t" \
558 : \ 557 : \
559 : "r" (cr0), "r" (xmm_save) \ 558 : "r" (xmm_save) \
560 : "memory"); \ 559 : "memory"); \
560 write_cr0(cr0); \
561 preempt_enable(); \ 561 preempt_enable(); \
562} while(0) 562} while(0)
563 563