aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/mips/au1000/Kconfig1
-rw-r--r--arch/mips/au1000/common/irq.c251
-rw-r--r--arch/mips/au1000/common/power.c7
-rw-r--r--arch/mips/au1000/pb1200/irqmap.c2
-rw-r--r--arch/mips/configs/mtx1_defconfig2
-rw-r--r--arch/mips/kernel/head.S4
-rw-r--r--arch/mips/kernel/time.c47
-rw-r--r--arch/mips/kernel/traps.c164
-rw-r--r--arch/mips/sgi-ip22/ip22-time.c9
-rw-r--r--arch/mips/sibyte/bcm1480/time.c2
-rw-r--r--arch/mips/sibyte/sb1250/time.c8
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/asm-offsets_32.c14
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/paravirt_32.c224
-rw-r--r--arch/x86/kernel/vmi_32.c201
-rw-r--r--arch/x86/mm/init_32.c22
-rw-r--r--arch/x86/xen/enlighten.c232
-rw-r--r--arch/x86/xen/mmu.c144
-rw-r--r--arch/x86/xen/multicalls.c52
-rw-r--r--arch/x86/xen/multicalls.h5
-rw-r--r--arch/x86/xen/smp.c14
-rw-r--r--arch/x86/xen/time.c6
-rw-r--r--arch/x86/xen/xen-ops.h10
-rw-r--r--block/ll_rw_blk.c10
-rw-r--r--drivers/char/hvc_lguest.c2
-rw-r--r--drivers/lguest/core.c6
-rw-r--r--drivers/lguest/lguest.c152
-rw-r--r--drivers/lguest/lguest_bus.c2
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--include/asm-mips/mach-au1x00/au1000.h624
-rw-r--r--include/asm-mips/mach-db1x00/db1200.h49
-rw-r--r--include/asm-mips/mach-pb1x00/pb1200.h52
-rw-r--r--include/asm-x86/paravirt.h487
-rw-r--r--include/asm-x86/pgtable-3level-defs.h2
-rw-r--r--include/xen/interface/vcpu.h5
-rw-r--r--mm/Kconfig1
37 files changed, 1627 insertions, 1194 deletions
diff --git a/arch/mips/au1000/Kconfig b/arch/mips/au1000/Kconfig
index 29c95d97217d..a23d4154da01 100644
--- a/arch/mips/au1000/Kconfig
+++ b/arch/mips/au1000/Kconfig
@@ -137,6 +137,7 @@ config SOC_AU1200
137config SOC_AU1X00 137config SOC_AU1X00
138 bool 138 bool
139 select 64BIT_PHYS_ADDR 139 select 64BIT_PHYS_ADDR
140 select IRQ_CPU
140 select SYS_HAS_CPU_MIPS32_R1 141 select SYS_HAS_CPU_MIPS32_R1
141 select SYS_SUPPORTS_32BIT_KERNEL 142 select SYS_SUPPORTS_32BIT_KERNEL
142 select SYS_SUPPORTS_APM_EMULATION 143 select SYS_SUPPORTS_APM_EMULATION
diff --git a/arch/mips/au1000/common/irq.c b/arch/mips/au1000/common/irq.c
index c00f308fd505..59e932a928d2 100644
--- a/arch/mips/au1000/common/irq.c
+++ b/arch/mips/au1000/common/irq.c
@@ -1,11 +1,10 @@
1/* 1/*
2 * BRIEF MODULE DESCRIPTION
3 * Au1000 interrupt routines.
4 *
5 * Copyright 2001 MontaVista Software Inc. 2 * Copyright 2001 MontaVista Software Inc.
6 * Author: MontaVista Software, Inc. 3 * Author: MontaVista Software, Inc.
7 * ppopov@mvista.com or source@mvista.com 4 * ppopov@mvista.com or source@mvista.com
8 * 5 *
6 * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
7 *
9 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the 9 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2 of the License, or (at your 10 * Free Software Foundation; either version 2 of the License, or (at your
@@ -32,6 +31,7 @@
32#include <linux/interrupt.h> 31#include <linux/interrupt.h>
33#include <linux/irq.h> 32#include <linux/irq.h>
34 33
34#include <asm/irq_cpu.h>
35#include <asm/mipsregs.h> 35#include <asm/mipsregs.h>
36#include <asm/mach-au1x00/au1000.h> 36#include <asm/mach-au1x00/au1000.h>
37#ifdef CONFIG_MIPS_PB1000 37#ifdef CONFIG_MIPS_PB1000
@@ -44,7 +44,7 @@
44#define EXT_INTC1_REQ1 5 /* IP 5 */ 44#define EXT_INTC1_REQ1 5 /* IP 5 */
45#define MIPS_TIMER_IP 7 /* IP 7 */ 45#define MIPS_TIMER_IP 7 /* IP 7 */
46 46
47void (*board_init_irq)(void); 47void (*board_init_irq)(void) __initdata = NULL;
48 48
49static DEFINE_SPINLOCK(irq_lock); 49static DEFINE_SPINLOCK(irq_lock);
50 50
@@ -134,12 +134,14 @@ void restore_au1xxx_intctl(void)
134 134
135inline void local_enable_irq(unsigned int irq_nr) 135inline void local_enable_irq(unsigned int irq_nr)
136{ 136{
137 if (irq_nr > AU1000_LAST_INTC0_INT) { 137 unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
138 au_writel(1 << (irq_nr - 32), IC1_MASKSET); 138
139 au_writel(1 << (irq_nr - 32), IC1_WAKESET); 139 if (bit >= 32) {
140 au_writel(1 << (bit - 32), IC1_MASKSET);
141 au_writel(1 << (bit - 32), IC1_WAKESET);
140 } else { 142 } else {
141 au_writel(1 << irq_nr, IC0_MASKSET); 143 au_writel(1 << bit, IC0_MASKSET);
142 au_writel(1 << irq_nr, IC0_WAKESET); 144 au_writel(1 << bit, IC0_WAKESET);
143 } 145 }
144 au_sync(); 146 au_sync();
145} 147}
@@ -147,12 +149,14 @@ inline void local_enable_irq(unsigned int irq_nr)
147 149
148inline void local_disable_irq(unsigned int irq_nr) 150inline void local_disable_irq(unsigned int irq_nr)
149{ 151{
150 if (irq_nr > AU1000_LAST_INTC0_INT) { 152 unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
151 au_writel(1 << (irq_nr - 32), IC1_MASKCLR); 153
152 au_writel(1 << (irq_nr - 32), IC1_WAKECLR); 154 if (bit >= 32) {
155 au_writel(1 << (bit - 32), IC1_MASKCLR);
156 au_writel(1 << (bit - 32), IC1_WAKECLR);
153 } else { 157 } else {
154 au_writel(1 << irq_nr, IC0_MASKCLR); 158 au_writel(1 << bit, IC0_MASKCLR);
155 au_writel(1 << irq_nr, IC0_WAKECLR); 159 au_writel(1 << bit, IC0_WAKECLR);
156 } 160 }
157 au_sync(); 161 au_sync();
158} 162}
@@ -160,12 +164,14 @@ inline void local_disable_irq(unsigned int irq_nr)
160 164
161static inline void mask_and_ack_rise_edge_irq(unsigned int irq_nr) 165static inline void mask_and_ack_rise_edge_irq(unsigned int irq_nr)
162{ 166{
163 if (irq_nr > AU1000_LAST_INTC0_INT) { 167 unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
164 au_writel(1 << (irq_nr - 32), IC1_RISINGCLR); 168
165 au_writel(1 << (irq_nr - 32), IC1_MASKCLR); 169 if (bit >= 32) {
170 au_writel(1 << (bit - 32), IC1_RISINGCLR);
171 au_writel(1 << (bit - 32), IC1_MASKCLR);
166 } else { 172 } else {
167 au_writel(1 << irq_nr, IC0_RISINGCLR); 173 au_writel(1 << bit, IC0_RISINGCLR);
168 au_writel(1 << irq_nr, IC0_MASKCLR); 174 au_writel(1 << bit, IC0_MASKCLR);
169 } 175 }
170 au_sync(); 176 au_sync();
171} 177}
@@ -173,12 +179,14 @@ static inline void mask_and_ack_rise_edge_irq(unsigned int irq_nr)
173 179
174static inline void mask_and_ack_fall_edge_irq(unsigned int irq_nr) 180static inline void mask_and_ack_fall_edge_irq(unsigned int irq_nr)
175{ 181{
176 if (irq_nr > AU1000_LAST_INTC0_INT) { 182 unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
177 au_writel(1 << (irq_nr - 32), IC1_FALLINGCLR); 183
178 au_writel(1 << (irq_nr - 32), IC1_MASKCLR); 184 if (bit >= 32) {
185 au_writel(1 << (bit - 32), IC1_FALLINGCLR);
186 au_writel(1 << (bit - 32), IC1_MASKCLR);
179 } else { 187 } else {
180 au_writel(1 << irq_nr, IC0_FALLINGCLR); 188 au_writel(1 << bit, IC0_FALLINGCLR);
181 au_writel(1 << irq_nr, IC0_MASKCLR); 189 au_writel(1 << bit, IC0_MASKCLR);
182 } 190 }
183 au_sync(); 191 au_sync();
184} 192}
@@ -186,17 +194,20 @@ static inline void mask_and_ack_fall_edge_irq(unsigned int irq_nr)
186 194
187static inline void mask_and_ack_either_edge_irq(unsigned int irq_nr) 195static inline void mask_and_ack_either_edge_irq(unsigned int irq_nr)
188{ 196{
189 /* This may assume that we don't get interrupts from 197 unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
198
199 /*
200 * This may assume that we don't get interrupts from
190 * both edges at once, or if we do, that we don't care. 201 * both edges at once, or if we do, that we don't care.
191 */ 202 */
192 if (irq_nr > AU1000_LAST_INTC0_INT) { 203 if (bit >= 32) {
193 au_writel(1 << (irq_nr - 32), IC1_FALLINGCLR); 204 au_writel(1 << (bit - 32), IC1_FALLINGCLR);
194 au_writel(1 << (irq_nr - 32), IC1_RISINGCLR); 205 au_writel(1 << (bit - 32), IC1_RISINGCLR);
195 au_writel(1 << (irq_nr - 32), IC1_MASKCLR); 206 au_writel(1 << (bit - 32), IC1_MASKCLR);
196 } else { 207 } else {
197 au_writel(1 << irq_nr, IC0_FALLINGCLR); 208 au_writel(1 << bit, IC0_FALLINGCLR);
198 au_writel(1 << irq_nr, IC0_RISINGCLR); 209 au_writel(1 << bit, IC0_RISINGCLR);
199 au_writel(1 << irq_nr, IC0_MASKCLR); 210 au_writel(1 << bit, IC0_MASKCLR);
200 } 211 }
201 au_sync(); 212 au_sync();
202} 213}
@@ -213,10 +224,8 @@ static inline void mask_and_ack_level_irq(unsigned int irq_nr)
213 au_sync(); 224 au_sync();
214 } 225 }
215#endif 226#endif
216 return;
217} 227}
218 228
219
220static void end_irq(unsigned int irq_nr) 229static void end_irq(unsigned int irq_nr)
221{ 230{
222 if (!(irq_desc[irq_nr].status & (IRQ_DISABLED | IRQ_INPROGRESS))) 231 if (!(irq_desc[irq_nr].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
@@ -341,114 +350,118 @@ void startup_match20_interrupt(irq_handler_t handler)
341} 350}
342#endif 351#endif
343 352
344static void setup_local_irq(unsigned int irq_nr, int type, int int_req) 353static void __init setup_local_irq(unsigned int irq_nr, int type, int int_req)
345{ 354{
346 if (irq_nr > AU1000_MAX_INTR) return; 355 unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
356
357 if (irq_nr > AU1000_MAX_INTR)
358 return;
359
347 /* Config2[n], Config1[n], Config0[n] */ 360 /* Config2[n], Config1[n], Config0[n] */
348 if (irq_nr > AU1000_LAST_INTC0_INT) { 361 if (bit >= 32) {
349 switch (type) { 362 switch (type) {
350 case INTC_INT_RISE_EDGE: /* 0:0:1 */ 363 case INTC_INT_RISE_EDGE: /* 0:0:1 */
351 au_writel(1 << (irq_nr - 32), IC1_CFG2CLR); 364 au_writel(1 << (bit - 32), IC1_CFG2CLR);
352 au_writel(1 << (irq_nr - 32), IC1_CFG1CLR); 365 au_writel(1 << (bit - 32), IC1_CFG1CLR);
353 au_writel(1 << (irq_nr - 32), IC1_CFG0SET); 366 au_writel(1 << (bit - 32), IC1_CFG0SET);
354 set_irq_chip(irq_nr, &rise_edge_irq_type); 367 set_irq_chip(irq_nr, &rise_edge_irq_type);
355 break; 368 break;
356 case INTC_INT_FALL_EDGE: /* 0:1:0 */ 369 case INTC_INT_FALL_EDGE: /* 0:1:0 */
357 au_writel(1 << (irq_nr - 32), IC1_CFG2CLR); 370 au_writel(1 << (bit - 32), IC1_CFG2CLR);
358 au_writel(1 << (irq_nr - 32), IC1_CFG1SET); 371 au_writel(1 << (bit - 32), IC1_CFG1SET);
359 au_writel(1 << (irq_nr - 32), IC1_CFG0CLR); 372 au_writel(1 << (bit - 32), IC1_CFG0CLR);
360 set_irq_chip(irq_nr, &fall_edge_irq_type); 373 set_irq_chip(irq_nr, &fall_edge_irq_type);
361 break; 374 break;
362 case INTC_INT_RISE_AND_FALL_EDGE: /* 0:1:1 */ 375 case INTC_INT_RISE_AND_FALL_EDGE: /* 0:1:1 */
363 au_writel(1 << (irq_nr - 32), IC1_CFG2CLR); 376 au_writel(1 << (bit - 32), IC1_CFG2CLR);
364 au_writel(1 << (irq_nr - 32), IC1_CFG1SET); 377 au_writel(1 << (bit - 32), IC1_CFG1SET);
365 au_writel(1 << (irq_nr - 32), IC1_CFG0SET); 378 au_writel(1 << (bit - 32), IC1_CFG0SET);
366 set_irq_chip(irq_nr, &either_edge_irq_type); 379 set_irq_chip(irq_nr, &either_edge_irq_type);
367 break; 380 break;
368 case INTC_INT_HIGH_LEVEL: /* 1:0:1 */ 381 case INTC_INT_HIGH_LEVEL: /* 1:0:1 */
369 au_writel(1 << (irq_nr - 32), IC1_CFG2SET); 382 au_writel(1 << (bit - 32), IC1_CFG2SET);
370 au_writel(1 << (irq_nr - 32), IC1_CFG1CLR); 383 au_writel(1 << (bit - 32), IC1_CFG1CLR);
371 au_writel(1 << (irq_nr - 32), IC1_CFG0SET); 384 au_writel(1 << (bit - 32), IC1_CFG0SET);
372 set_irq_chip(irq_nr, &level_irq_type); 385 set_irq_chip(irq_nr, &level_irq_type);
373 break; 386 break;
374 case INTC_INT_LOW_LEVEL: /* 1:1:0 */ 387 case INTC_INT_LOW_LEVEL: /* 1:1:0 */
375 au_writel(1 << (irq_nr - 32), IC1_CFG2SET); 388 au_writel(1 << (bit - 32), IC1_CFG2SET);
376 au_writel(1 << (irq_nr - 32), IC1_CFG1SET); 389 au_writel(1 << (bit - 32), IC1_CFG1SET);
377 au_writel(1 << (irq_nr - 32), IC1_CFG0CLR); 390 au_writel(1 << (bit - 32), IC1_CFG0CLR);
378 set_irq_chip(irq_nr, &level_irq_type); 391 set_irq_chip(irq_nr, &level_irq_type);
379 break; 392 break;
380 case INTC_INT_DISABLED: /* 0:0:0 */ 393 case INTC_INT_DISABLED: /* 0:0:0 */
381 au_writel(1 << (irq_nr - 32), IC1_CFG0CLR); 394 au_writel(1 << (bit - 32), IC1_CFG0CLR);
382 au_writel(1 << (irq_nr - 32), IC1_CFG1CLR); 395 au_writel(1 << (bit - 32), IC1_CFG1CLR);
383 au_writel(1 << (irq_nr - 32), IC1_CFG2CLR); 396 au_writel(1 << (bit - 32), IC1_CFG2CLR);
384 break; 397 break;
385 default: /* disable the interrupt */ 398 default: /* disable the interrupt */
386 printk(KERN_WARNING "unexpected int type %d (irq %d)\n", 399 printk(KERN_WARNING "unexpected int type %d (irq %d)\n",
387 type, irq_nr); 400 type, irq_nr);
388 au_writel(1 << (irq_nr - 32), IC1_CFG0CLR); 401 au_writel(1 << (bit - 32), IC1_CFG0CLR);
389 au_writel(1 << (irq_nr - 32), IC1_CFG1CLR); 402 au_writel(1 << (bit - 32), IC1_CFG1CLR);
390 au_writel(1 << (irq_nr - 32), IC1_CFG2CLR); 403 au_writel(1 << (bit - 32), IC1_CFG2CLR);
391 return; 404 return;
392 } 405 }
393 if (int_req) /* assign to interrupt request 1 */ 406 if (int_req) /* assign to interrupt request 1 */
394 au_writel(1 << (irq_nr - 32), IC1_ASSIGNCLR); 407 au_writel(1 << (bit - 32), IC1_ASSIGNCLR);
395 else /* assign to interrupt request 0 */ 408 else /* assign to interrupt request 0 */
396 au_writel(1 << (irq_nr - 32), IC1_ASSIGNSET); 409 au_writel(1 << (bit - 32), IC1_ASSIGNSET);
397 au_writel(1 << (irq_nr - 32), IC1_SRCSET); 410 au_writel(1 << (bit - 32), IC1_SRCSET);
398 au_writel(1 << (irq_nr - 32), IC1_MASKCLR); 411 au_writel(1 << (bit - 32), IC1_MASKCLR);
399 au_writel(1 << (irq_nr - 32), IC1_WAKECLR); 412 au_writel(1 << (bit - 32), IC1_WAKECLR);
400 } else { 413 } else {
401 switch (type) { 414 switch (type) {
402 case INTC_INT_RISE_EDGE: /* 0:0:1 */ 415 case INTC_INT_RISE_EDGE: /* 0:0:1 */
403 au_writel(1 << irq_nr, IC0_CFG2CLR); 416 au_writel(1 << bit, IC0_CFG2CLR);
404 au_writel(1 << irq_nr, IC0_CFG1CLR); 417 au_writel(1 << bit, IC0_CFG1CLR);
405 au_writel(1 << irq_nr, IC0_CFG0SET); 418 au_writel(1 << bit, IC0_CFG0SET);
406 set_irq_chip(irq_nr, &rise_edge_irq_type); 419 set_irq_chip(irq_nr, &rise_edge_irq_type);
407 break; 420 break;
408 case INTC_INT_FALL_EDGE: /* 0:1:0 */ 421 case INTC_INT_FALL_EDGE: /* 0:1:0 */
409 au_writel(1 << irq_nr, IC0_CFG2CLR); 422 au_writel(1 << bit, IC0_CFG2CLR);
410 au_writel(1 << irq_nr, IC0_CFG1SET); 423 au_writel(1 << bit, IC0_CFG1SET);
411 au_writel(1 << irq_nr, IC0_CFG0CLR); 424 au_writel(1 << bit, IC0_CFG0CLR);
412 set_irq_chip(irq_nr, &fall_edge_irq_type); 425 set_irq_chip(irq_nr, &fall_edge_irq_type);
413 break; 426 break;
414 case INTC_INT_RISE_AND_FALL_EDGE: /* 0:1:1 */ 427 case INTC_INT_RISE_AND_FALL_EDGE: /* 0:1:1 */
415 au_writel(1 << irq_nr, IC0_CFG2CLR); 428 au_writel(1 << bit, IC0_CFG2CLR);
416 au_writel(1 << irq_nr, IC0_CFG1SET); 429 au_writel(1 << bit, IC0_CFG1SET);
417 au_writel(1 << irq_nr, IC0_CFG0SET); 430 au_writel(1 << bit, IC0_CFG0SET);
418 set_irq_chip(irq_nr, &either_edge_irq_type); 431 set_irq_chip(irq_nr, &either_edge_irq_type);
419 break; 432 break;
420 case INTC_INT_HIGH_LEVEL: /* 1:0:1 */ 433 case INTC_INT_HIGH_LEVEL: /* 1:0:1 */
421 au_writel(1 << irq_nr, IC0_CFG2SET); 434 au_writel(1 << bit, IC0_CFG2SET);
422 au_writel(1 << irq_nr, IC0_CFG1CLR); 435 au_writel(1 << bit, IC0_CFG1CLR);
423 au_writel(1 << irq_nr, IC0_CFG0SET); 436 au_writel(1 << bit, IC0_CFG0SET);
424 set_irq_chip(irq_nr, &level_irq_type); 437 set_irq_chip(irq_nr, &level_irq_type);
425 break; 438 break;
426 case INTC_INT_LOW_LEVEL: /* 1:1:0 */ 439 case INTC_INT_LOW_LEVEL: /* 1:1:0 */
427 au_writel(1 << irq_nr, IC0_CFG2SET); 440 au_writel(1 << bit, IC0_CFG2SET);
428 au_writel(1 << irq_nr, IC0_CFG1SET); 441 au_writel(1 << bit, IC0_CFG1SET);
429 au_writel(1 << irq_nr, IC0_CFG0CLR); 442 au_writel(1 << bit, IC0_CFG0CLR);
430 set_irq_chip(irq_nr, &level_irq_type); 443 set_irq_chip(irq_nr, &level_irq_type);
431 break; 444 break;
432 case INTC_INT_DISABLED: /* 0:0:0 */ 445 case INTC_INT_DISABLED: /* 0:0:0 */
433 au_writel(1 << irq_nr, IC0_CFG0CLR); 446 au_writel(1 << bit, IC0_CFG0CLR);
434 au_writel(1 << irq_nr, IC0_CFG1CLR); 447 au_writel(1 << bit, IC0_CFG1CLR);
435 au_writel(1 << irq_nr, IC0_CFG2CLR); 448 au_writel(1 << bit, IC0_CFG2CLR);
436 break; 449 break;
437 default: /* disable the interrupt */ 450 default: /* disable the interrupt */
438 printk(KERN_WARNING "unexpected int type %d (irq %d)\n", 451 printk(KERN_WARNING "unexpected int type %d (irq %d)\n",
439 type, irq_nr); 452 type, irq_nr);
440 au_writel(1 << irq_nr, IC0_CFG0CLR); 453 au_writel(1 << bit, IC0_CFG0CLR);
441 au_writel(1 << irq_nr, IC0_CFG1CLR); 454 au_writel(1 << bit, IC0_CFG1CLR);
442 au_writel(1 << irq_nr, IC0_CFG2CLR); 455 au_writel(1 << bit, IC0_CFG2CLR);
443 return; 456 return;
444 } 457 }
445 if (int_req) /* assign to interrupt request 1 */ 458 if (int_req) /* assign to interrupt request 1 */
446 au_writel(1 << irq_nr, IC0_ASSIGNCLR); 459 au_writel(1 << bit, IC0_ASSIGNCLR);
447 else /* assign to interrupt request 0 */ 460 else /* assign to interrupt request 0 */
448 au_writel(1 << irq_nr, IC0_ASSIGNSET); 461 au_writel(1 << bit, IC0_ASSIGNSET);
449 au_writel(1 << irq_nr, IC0_SRCSET); 462 au_writel(1 << bit, IC0_SRCSET);
450 au_writel(1 << irq_nr, IC0_MASKCLR); 463 au_writel(1 << bit, IC0_MASKCLR);
451 au_writel(1 << irq_nr, IC0_WAKECLR); 464 au_writel(1 << bit, IC0_WAKECLR);
452 } 465 }
453 au_sync(); 466 au_sync();
454} 467}
@@ -461,8 +474,8 @@ static void setup_local_irq(unsigned int irq_nr, int type, int int_req)
461 474
462static void intc0_req0_irqdispatch(void) 475static void intc0_req0_irqdispatch(void)
463{ 476{
464 int irq = 0;
465 static unsigned long intc0_req0; 477 static unsigned long intc0_req0;
478 unsigned int bit;
466 479
467 intc0_req0 |= au_readl(IC0_REQ0INT); 480 intc0_req0 |= au_readl(IC0_REQ0INT);
468 481
@@ -481,25 +494,25 @@ static void intc0_req0_irqdispatch(void)
481 return; 494 return;
482 } 495 }
483#endif 496#endif
484 irq = ffs(intc0_req0); 497 bit = ffs(intc0_req0);
485 intc0_req0 &= ~(1 << irq); 498 intc0_req0 &= ~(1 << bit);
486 do_IRQ(irq); 499 do_IRQ(MIPS_CPU_IRQ_BASE + bit);
487} 500}
488 501
489 502
490static void intc0_req1_irqdispatch(void) 503static void intc0_req1_irqdispatch(void)
491{ 504{
492 int irq = 0;
493 static unsigned long intc0_req1; 505 static unsigned long intc0_req1;
506 unsigned int bit;
494 507
495 intc0_req1 |= au_readl(IC0_REQ1INT); 508 intc0_req1 |= au_readl(IC0_REQ1INT);
496 509
497 if (!intc0_req1) 510 if (!intc0_req1)
498 return; 511 return;
499 512
500 irq = ffs(intc0_req1); 513 bit = ffs(intc0_req1);
501 intc0_req1 &= ~(1 << irq); 514 intc0_req1 &= ~(1 << bit);
502 do_IRQ(irq); 515 do_IRQ(bit);
503} 516}
504 517
505 518
@@ -509,43 +522,41 @@ static void intc0_req1_irqdispatch(void)
509 */ 522 */
510static void intc1_req0_irqdispatch(void) 523static void intc1_req0_irqdispatch(void)
511{ 524{
512 int irq = 0;
513 static unsigned long intc1_req0; 525 static unsigned long intc1_req0;
526 unsigned int bit;
514 527
515 intc1_req0 |= au_readl(IC1_REQ0INT); 528 intc1_req0 |= au_readl(IC1_REQ0INT);
516 529
517 if (!intc1_req0) 530 if (!intc1_req0)
518 return; 531 return;
519 532
520 irq = ffs(intc1_req0); 533 bit = ffs(intc1_req0);
521 intc1_req0 &= ~(1 << irq); 534 intc1_req0 &= ~(1 << bit);
522 irq += 32; 535 do_IRQ(MIPS_CPU_IRQ_BASE + 32 + bit);
523 do_IRQ(irq);
524} 536}
525 537
526 538
527static void intc1_req1_irqdispatch(void) 539static void intc1_req1_irqdispatch(void)
528{ 540{
529 int irq = 0;
530 static unsigned long intc1_req1; 541 static unsigned long intc1_req1;
542 unsigned int bit;
531 543
532 intc1_req1 |= au_readl(IC1_REQ1INT); 544 intc1_req1 |= au_readl(IC1_REQ1INT);
533 545
534 if (!intc1_req1) 546 if (!intc1_req1)
535 return; 547 return;
536 548
537 irq = ffs(intc1_req1); 549 bit = ffs(intc1_req1);
538 intc1_req1 &= ~(1 << irq); 550 intc1_req1 &= ~(1 << bit);
539 irq += 32; 551 do_IRQ(MIPS_CPU_IRQ_BASE + 32 + bit);
540 do_IRQ(irq);
541} 552}
542 553
543asmlinkage void plat_irq_dispatch(void) 554asmlinkage void plat_irq_dispatch(void)
544{ 555{
545 unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; 556 unsigned int pending = read_c0_status() & read_c0_cause();
546 557
547 if (pending & CAUSEF_IP7) 558 if (pending & CAUSEF_IP7)
548 do_IRQ(63); 559 do_IRQ(MIPS_CPU_IRQ_BASE + 7);
549 else if (pending & CAUSEF_IP2) 560 else if (pending & CAUSEF_IP2)
550 intc0_req0_irqdispatch(); 561 intc0_req0_irqdispatch();
551 else if (pending & CAUSEF_IP3) 562 else if (pending & CAUSEF_IP3)
@@ -561,17 +572,15 @@ asmlinkage void plat_irq_dispatch(void)
561void __init arch_init_irq(void) 572void __init arch_init_irq(void)
562{ 573{
563 int i; 574 int i;
564 unsigned long cp0_status;
565 struct au1xxx_irqmap *imp; 575 struct au1xxx_irqmap *imp;
566 extern struct au1xxx_irqmap au1xxx_irq_map[]; 576 extern struct au1xxx_irqmap au1xxx_irq_map[];
567 extern struct au1xxx_irqmap au1xxx_ic0_map[]; 577 extern struct au1xxx_irqmap au1xxx_ic0_map[];
568 extern int au1xxx_nr_irqs; 578 extern int au1xxx_nr_irqs;
569 extern int au1xxx_ic0_nr_irqs; 579 extern int au1xxx_ic0_nr_irqs;
570 580
571 cp0_status = read_c0_status(); 581 /*
572 582 * Initialize interrupt controllers to a safe state.
573 /* Initialize interrupt controllers to a safe state. 583 */
574 */
575 au_writel(0xffffffff, IC0_CFG0CLR); 584 au_writel(0xffffffff, IC0_CFG0CLR);
576 au_writel(0xffffffff, IC0_CFG1CLR); 585 au_writel(0xffffffff, IC0_CFG1CLR);
577 au_writel(0xffffffff, IC0_CFG2CLR); 586 au_writel(0xffffffff, IC0_CFG2CLR);
@@ -594,16 +603,20 @@ void __init arch_init_irq(void)
594 au_writel(0xffffffff, IC1_RISINGCLR); 603 au_writel(0xffffffff, IC1_RISINGCLR);
595 au_writel(0x00000000, IC1_TESTBIT); 604 au_writel(0x00000000, IC1_TESTBIT);
596 605
597 /* Initialize IC0, which is fixed per processor. 606 mips_cpu_irq_init();
598 */ 607
608 /*
609 * Initialize IC0, which is fixed per processor.
610 */
599 imp = au1xxx_ic0_map; 611 imp = au1xxx_ic0_map;
600 for (i = 0; i < au1xxx_ic0_nr_irqs; i++) { 612 for (i = 0; i < au1xxx_ic0_nr_irqs; i++) {
601 setup_local_irq(imp->im_irq, imp->im_type, imp->im_request); 613 setup_local_irq(imp->im_irq, imp->im_type, imp->im_request);
602 imp++; 614 imp++;
603 } 615 }
604 616
605 /* Now set up the irq mapping for the board. 617 /*
606 */ 618 * Now set up the irq mapping for the board.
619 */
607 imp = au1xxx_irq_map; 620 imp = au1xxx_irq_map;
608 for (i = 0; i < au1xxx_nr_irqs; i++) { 621 for (i = 0; i < au1xxx_nr_irqs; i++) {
609 setup_local_irq(imp->im_irq, imp->im_type, imp->im_request); 622 setup_local_irq(imp->im_irq, imp->im_type, imp->im_request);
@@ -615,5 +628,5 @@ void __init arch_init_irq(void)
615 /* Board specific IRQ initialization. 628 /* Board specific IRQ initialization.
616 */ 629 */
617 if (board_init_irq) 630 if (board_init_irq)
618 (*board_init_irq)(); 631 board_init_irq();
619} 632}
diff --git a/arch/mips/au1000/common/power.c b/arch/mips/au1000/common/power.c
index 6f57f72a7d57..54047d69b820 100644
--- a/arch/mips/au1000/common/power.c
+++ b/arch/mips/au1000/common/power.c
@@ -403,9 +403,9 @@ static int pm_do_freq(ctl_table * ctl, int write, struct file *file,
403 } 403 }
404 404
405 405
406 /* We don't want _any_ interrupts other than 406 /*
407 * match20. Otherwise our au1000_calibrate_delay() 407 * We don't want _any_ interrupts other than match20. Otherwise our
408 * calculation will be off, potentially a lot. 408 * au1000_calibrate_delay() calculation will be off, potentially a lot.
409 */ 409 */
410 intc0_mask = save_local_and_disable(0); 410 intc0_mask = save_local_and_disable(0);
411 intc1_mask = save_local_and_disable(1); 411 intc1_mask = save_local_and_disable(1);
@@ -414,6 +414,7 @@ static int pm_do_freq(ctl_table * ctl, int write, struct file *file,
414 au1000_calibrate_delay(); 414 au1000_calibrate_delay();
415 restore_local_and_enable(0, intc0_mask); 415 restore_local_and_enable(0, intc0_mask);
416 restore_local_and_enable(1, intc1_mask); 416 restore_local_and_enable(1, intc1_mask);
417
417 return retval; 418 return retval;
418} 419}
419 420
diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c
index 3bee274445f5..5f48b0603796 100644
--- a/arch/mips/au1000/pb1200/irqmap.c
+++ b/arch/mips/au1000/pb1200/irqmap.c
@@ -74,7 +74,7 @@ irqreturn_t pb1200_cascade_handler( int irq, void *dev_id)
74 bcsr->int_status = bisr; 74 bcsr->int_status = bisr;
75 for( ; bisr; bisr &= (bisr-1) ) 75 for( ; bisr; bisr &= (bisr-1) )
76 { 76 {
77 extirq_nr = PB1200_INT_BEGIN + au_ffs(bisr); 77 extirq_nr = PB1200_INT_BEGIN + ffs(bisr);
78 /* Ack and dispatch IRQ */ 78 /* Ack and dispatch IRQ */
79 do_IRQ(extirq_nr); 79 do_IRQ(extirq_nr);
80 } 80 }
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 0280ef389d8d..b536d7c63790 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -3021,7 +3021,7 @@ CONFIG_MAGIC_SYSRQ=y
3021# CONFIG_DEBUG_FS is not set 3021# CONFIG_DEBUG_FS is not set
3022# CONFIG_HEADERS_CHECK is not set 3022# CONFIG_HEADERS_CHECK is not set
3023# CONFIG_DEBUG_KERNEL is not set 3023# CONFIG_DEBUG_KERNEL is not set
3024# CONFIG_CROSSCOMPILE is not set 3024CONFIG_CROSSCOMPILE=y
3025CONFIG_CMDLINE="" 3025CONFIG_CMDLINE=""
3026CONFIG_SYS_SUPPORTS_KGDB=y 3026CONFIG_SYS_SUPPORTS_KGDB=y
3027 3027
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index e46782b0ebc8..bf164a562acb 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -140,7 +140,7 @@
140 140
141EXPORT(_stext) 141EXPORT(_stext)
142 142
143#ifndef CONFIG_BOOT_RAW 143#ifdef CONFIG_BOOT_RAW
144 /* 144 /*
145 * Give us a fighting chance of running if execution beings at the 145 * Give us a fighting chance of running if execution beings at the
146 * kernel load address. This is needed because this platform does 146 * kernel load address. This is needed because this platform does
@@ -149,6 +149,8 @@ EXPORT(_stext)
149 __INIT 149 __INIT
150#endif 150#endif
151 151
152 __INIT_REFOK
153
152NESTED(kernel_entry, 16, sp) # kernel entry point 154NESTED(kernel_entry, 16, sp) # kernel entry point
153 155
154 kernel_entry_setup # cpu specific setup 156 kernel_entry_setup # cpu specific setup
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 05b365167a09..e4b5e647b142 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -391,6 +391,50 @@ static void mips_event_handler(struct clock_event_device *dev)
391{ 391{
392} 392}
393 393
394/*
395 * FIXME: This doesn't hold for the relocated E9000 compare interrupt.
396 */
397static int c0_compare_int_pending(void)
398{
399 return (read_c0_cause() >> cp0_compare_irq) & 0x100;
400}
401
402static int c0_compare_int_usable(void)
403{
404 const unsigned int delta = 0x300000;
405 unsigned int cnt;
406
407 /*
408 * IP7 already pending? Try to clear it by acking the timer.
409 */
410 if (c0_compare_int_pending()) {
411 write_c0_compare(read_c0_compare());
412 irq_disable_hazard();
413 if (c0_compare_int_pending())
414 return 0;
415 }
416
417 cnt = read_c0_count();
418 cnt += delta;
419 write_c0_compare(cnt);
420
421 while ((long)(read_c0_count() - cnt) <= 0)
422 ; /* Wait for expiry */
423
424 if (!c0_compare_int_pending())
425 return 0;
426
427 write_c0_compare(read_c0_compare());
428 irq_disable_hazard();
429 if (c0_compare_int_pending())
430 return 0;
431
432 /*
433 * Feels like a real count / compare timer.
434 */
435 return 1;
436}
437
394void __cpuinit mips_clockevent_init(void) 438void __cpuinit mips_clockevent_init(void)
395{ 439{
396 uint64_t mips_freq = mips_hpt_frequency; 440 uint64_t mips_freq = mips_hpt_frequency;
@@ -412,6 +456,9 @@ void __cpuinit mips_clockevent_init(void)
412 return; 456 return;
413#endif 457#endif
414 458
459 if (!c0_compare_int_usable())
460 return;
461
415 cd = &per_cpu(mips_clockevent_device, cpu); 462 cd = &per_cpu(mips_clockevent_device, cpu);
416 463
417 cd->name = "MIPS"; 464 cd->name = "MIPS";
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 9c0c478d71ac..bbf01b81a4ff 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -9,9 +9,10 @@
9 * Copyright (C) 1999 Silicon Graphics, Inc. 9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com 10 * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
11 * Copyright (C) 2000, 01 MIPS Technologies, Inc. 11 * Copyright (C) 2000, 01 MIPS Technologies, Inc.
12 * Copyright (C) 2002, 2003, 2004, 2005 Maciej W. Rozycki 12 * Copyright (C) 2002, 2003, 2004, 2005, 2007 Maciej W. Rozycki
13 */ 13 */
14#include <linux/bug.h> 14#include <linux/bug.h>
15#include <linux/compiler.h>
15#include <linux/init.h> 16#include <linux/init.h>
16#include <linux/mm.h> 17#include <linux/mm.h>
17#include <linux/module.h> 18#include <linux/module.h>
@@ -410,7 +411,7 @@ asmlinkage void do_be(struct pt_regs *regs)
410} 411}
411 412
412/* 413/*
413 * ll/sc emulation 414 * ll/sc, rdhwr, sync emulation
414 */ 415 */
415 416
416#define OPCODE 0xfc000000 417#define OPCODE 0xfc000000
@@ -419,9 +420,11 @@ asmlinkage void do_be(struct pt_regs *regs)
419#define OFFSET 0x0000ffff 420#define OFFSET 0x0000ffff
420#define LL 0xc0000000 421#define LL 0xc0000000
421#define SC 0xe0000000 422#define SC 0xe0000000
423#define SPEC0 0x00000000
422#define SPEC3 0x7c000000 424#define SPEC3 0x7c000000
423#define RD 0x0000f800 425#define RD 0x0000f800
424#define FUNC 0x0000003f 426#define FUNC 0x0000003f
427#define SYNC 0x0000000f
425#define RDHWR 0x0000003b 428#define RDHWR 0x0000003b
426 429
427/* 430/*
@@ -432,11 +435,10 @@ unsigned long ll_bit;
432 435
433static struct task_struct *ll_task = NULL; 436static struct task_struct *ll_task = NULL;
434 437
435static inline void simulate_ll(struct pt_regs *regs, unsigned int opcode) 438static inline int simulate_ll(struct pt_regs *regs, unsigned int opcode)
436{ 439{
437 unsigned long value, __user *vaddr; 440 unsigned long value, __user *vaddr;
438 long offset; 441 long offset;
439 int signal = 0;
440 442
441 /* 443 /*
442 * analyse the ll instruction that just caused a ri exception 444 * analyse the ll instruction that just caused a ri exception
@@ -451,14 +453,10 @@ static inline void simulate_ll(struct pt_regs *regs, unsigned int opcode)
451 vaddr = (unsigned long __user *) 453 vaddr = (unsigned long __user *)
452 ((unsigned long)(regs->regs[(opcode & BASE) >> 21]) + offset); 454 ((unsigned long)(regs->regs[(opcode & BASE) >> 21]) + offset);
453 455
454 if ((unsigned long)vaddr & 3) { 456 if ((unsigned long)vaddr & 3)
455 signal = SIGBUS; 457 return SIGBUS;
456 goto sig; 458 if (get_user(value, vaddr))
457 } 459 return SIGSEGV;
458 if (get_user(value, vaddr)) {
459 signal = SIGSEGV;
460 goto sig;
461 }
462 460
463 preempt_disable(); 461 preempt_disable();
464 462
@@ -471,22 +469,16 @@ static inline void simulate_ll(struct pt_regs *regs, unsigned int opcode)
471 469
472 preempt_enable(); 470 preempt_enable();
473 471
474 compute_return_epc(regs);
475
476 regs->regs[(opcode & RT) >> 16] = value; 472 regs->regs[(opcode & RT) >> 16] = value;
477 473
478 return; 474 return 0;
479
480sig:
481 force_sig(signal, current);
482} 475}
483 476
484static inline void simulate_sc(struct pt_regs *regs, unsigned int opcode) 477static inline int simulate_sc(struct pt_regs *regs, unsigned int opcode)
485{ 478{
486 unsigned long __user *vaddr; 479 unsigned long __user *vaddr;
487 unsigned long reg; 480 unsigned long reg;
488 long offset; 481 long offset;
489 int signal = 0;
490 482
491 /* 483 /*
492 * analyse the sc instruction that just caused a ri exception 484 * analyse the sc instruction that just caused a ri exception
@@ -502,34 +494,25 @@ static inline void simulate_sc(struct pt_regs *regs, unsigned int opcode)
502 ((unsigned long)(regs->regs[(opcode & BASE) >> 21]) + offset); 494 ((unsigned long)(regs->regs[(opcode & BASE) >> 21]) + offset);
503 reg = (opcode & RT) >> 16; 495 reg = (opcode & RT) >> 16;
504 496
505 if ((unsigned long)vaddr & 3) { 497 if ((unsigned long)vaddr & 3)
506 signal = SIGBUS; 498 return SIGBUS;
507 goto sig;
508 }
509 499
510 preempt_disable(); 500 preempt_disable();
511 501
512 if (ll_bit == 0 || ll_task != current) { 502 if (ll_bit == 0 || ll_task != current) {
513 compute_return_epc(regs);
514 regs->regs[reg] = 0; 503 regs->regs[reg] = 0;
515 preempt_enable(); 504 preempt_enable();
516 return; 505 return 0;
517 } 506 }
518 507
519 preempt_enable(); 508 preempt_enable();
520 509
521 if (put_user(regs->regs[reg], vaddr)) { 510 if (put_user(regs->regs[reg], vaddr))
522 signal = SIGSEGV; 511 return SIGSEGV;
523 goto sig;
524 }
525 512
526 compute_return_epc(regs);
527 regs->regs[reg] = 1; 513 regs->regs[reg] = 1;
528 514
529 return; 515 return 0;
530
531sig:
532 force_sig(signal, current);
533} 516}
534 517
535/* 518/*
@@ -539,27 +522,14 @@ sig:
539 * few processors such as NEC's VR4100 throw reserved instruction exceptions 522 * few processors such as NEC's VR4100 throw reserved instruction exceptions
540 * instead, so we're doing the emulation thing in both exception handlers. 523 * instead, so we're doing the emulation thing in both exception handlers.
541 */ 524 */
542static inline int simulate_llsc(struct pt_regs *regs) 525static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)
543{ 526{
544 unsigned int opcode; 527 if ((opcode & OPCODE) == LL)
545 528 return simulate_ll(regs, opcode);
546 if (get_user(opcode, (unsigned int __user *) exception_epc(regs))) 529 if ((opcode & OPCODE) == SC)
547 goto out_sigsegv; 530 return simulate_sc(regs, opcode);
548
549 if ((opcode & OPCODE) == LL) {
550 simulate_ll(regs, opcode);
551 return 0;
552 }
553 if ((opcode & OPCODE) == SC) {
554 simulate_sc(regs, opcode);
555 return 0;
556 }
557
558 return -EFAULT; /* Strange things going on ... */
559 531
560out_sigsegv: 532 return -1; /* Must be something else ... */
561 force_sig(SIGSEGV, current);
562 return -EFAULT;
563} 533}
564 534
565/* 535/*
@@ -567,16 +537,9 @@ out_sigsegv:
567 * registers not implemented in hardware. The only current use of this 537 * registers not implemented in hardware. The only current use of this
568 * is the thread area pointer. 538 * is the thread area pointer.
569 */ 539 */
570static inline int simulate_rdhwr(struct pt_regs *regs) 540static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)
571{ 541{
572 struct thread_info *ti = task_thread_info(current); 542 struct thread_info *ti = task_thread_info(current);
573 unsigned int opcode;
574
575 if (get_user(opcode, (unsigned int __user *) exception_epc(regs)))
576 goto out_sigsegv;
577
578 if (unlikely(compute_return_epc(regs)))
579 return -EFAULT;
580 543
581 if ((opcode & OPCODE) == SPEC3 && (opcode & FUNC) == RDHWR) { 544 if ((opcode & OPCODE) == SPEC3 && (opcode & FUNC) == RDHWR) {
582 int rd = (opcode & RD) >> 11; 545 int rd = (opcode & RD) >> 11;
@@ -586,16 +549,20 @@ static inline int simulate_rdhwr(struct pt_regs *regs)
586 regs->regs[rt] = ti->tp_value; 549 regs->regs[rt] = ti->tp_value;
587 return 0; 550 return 0;
588 default: 551 default:
589 return -EFAULT; 552 return -1;
590 } 553 }
591 } 554 }
592 555
593 /* Not ours. */ 556 /* Not ours. */
594 return -EFAULT; 557 return -1;
558}
595 559
596out_sigsegv: 560static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
597 force_sig(SIGSEGV, current); 561{
598 return -EFAULT; 562 if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC)
563 return 0;
564
565 return -1; /* Must be something else ... */
599} 566}
600 567
601asmlinkage void do_ov(struct pt_regs *regs) 568asmlinkage void do_ov(struct pt_regs *regs)
@@ -767,16 +734,35 @@ out_sigsegv:
767 734
768asmlinkage void do_ri(struct pt_regs *regs) 735asmlinkage void do_ri(struct pt_regs *regs)
769{ 736{
770 die_if_kernel("Reserved instruction in kernel code", regs); 737 unsigned int __user *epc = (unsigned int __user *)exception_epc(regs);
738 unsigned long old_epc = regs->cp0_epc;
739 unsigned int opcode = 0;
740 int status = -1;
771 741
772 if (!cpu_has_llsc) 742 die_if_kernel("Reserved instruction in kernel code", regs);
773 if (!simulate_llsc(regs))
774 return;
775 743
776 if (!simulate_rdhwr(regs)) 744 if (unlikely(compute_return_epc(regs) < 0))
777 return; 745 return;
778 746
779 force_sig(SIGILL, current); 747 if (unlikely(get_user(opcode, epc) < 0))
748 status = SIGSEGV;
749
750 if (!cpu_has_llsc && status < 0)
751 status = simulate_llsc(regs, opcode);
752
753 if (status < 0)
754 status = simulate_rdhwr(regs, opcode);
755
756 if (status < 0)
757 status = simulate_sync(regs, opcode);
758
759 if (status < 0)
760 status = SIGILL;
761
762 if (unlikely(status > 0)) {
763 regs->cp0_epc = old_epc; /* Undo skip-over. */
764 force_sig(status, current);
765 }
780} 766}
781 767
782/* 768/*
@@ -808,7 +794,11 @@ static void mt_ase_fp_affinity(void)
808 794
809asmlinkage void do_cpu(struct pt_regs *regs) 795asmlinkage void do_cpu(struct pt_regs *regs)
810{ 796{
797 unsigned int __user *epc;
798 unsigned long old_epc;
799 unsigned int opcode;
811 unsigned int cpid; 800 unsigned int cpid;
801 int status;
812 802
813 die_if_kernel("do_cpu invoked from kernel context!", regs); 803 die_if_kernel("do_cpu invoked from kernel context!", regs);
814 804
@@ -816,14 +806,32 @@ asmlinkage void do_cpu(struct pt_regs *regs)
816 806
817 switch (cpid) { 807 switch (cpid) {
818 case 0: 808 case 0:
819 if (!cpu_has_llsc) 809 epc = (unsigned int __user *)exception_epc(regs);
820 if (!simulate_llsc(regs)) 810 old_epc = regs->cp0_epc;
821 return; 811 opcode = 0;
812 status = -1;
822 813
823 if (!simulate_rdhwr(regs)) 814 if (unlikely(compute_return_epc(regs) < 0))
824 return; 815 return;
825 816
826 break; 817 if (unlikely(get_user(opcode, epc) < 0))
818 status = SIGSEGV;
819
820 if (!cpu_has_llsc && status < 0)
821 status = simulate_llsc(regs, opcode);
822
823 if (status < 0)
824 status = simulate_rdhwr(regs, opcode);
825
826 if (status < 0)
827 status = SIGILL;
828
829 if (unlikely(status > 0)) {
830 regs->cp0_epc = old_epc; /* Undo skip-over. */
831 force_sig(status, current);
832 }
833
834 return;
827 835
828 case 1: 836 case 1:
829 if (used_math()) /* Using the FPU again. */ 837 if (used_math()) /* Using the FPU again. */
diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index 9b9bffd2e8fb..10e505491655 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -192,12 +192,3 @@ void indy_8254timer_irq(void)
192 ArcEnterInteractiveMode(); 192 ArcEnterInteractiveMode();
193 irq_exit(); 193 irq_exit();
194} 194}
195
196void __init plat_timer_setup(struct irqaction *irq)
197{
198 /* over-write the handler, we use our own way */
199 irq->handler = no_action;
200
201 /* setup irqaction */
202 setup_irq(SGI_TIMER_IRQ, irq);
203}
diff --git a/arch/mips/sibyte/bcm1480/time.c b/arch/mips/sibyte/bcm1480/time.c
index 40d7126cd5bf..5b4bfbbb5a24 100644
--- a/arch/mips/sibyte/bcm1480/time.c
+++ b/arch/mips/sibyte/bcm1480/time.c
@@ -84,7 +84,7 @@ static void sibyte_set_mode(enum clock_event_mode mode,
84 void __iomem *timer_cfg, *timer_init; 84 void __iomem *timer_cfg, *timer_init;
85 85
86 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 86 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
87 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 87 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
88 88
89 switch (mode) { 89 switch (mode) {
90 case CLOCK_EVT_MODE_PERIODIC: 90 case CLOCK_EVT_MODE_PERIODIC:
diff --git a/arch/mips/sibyte/sb1250/time.c b/arch/mips/sibyte/sb1250/time.c
index 38199ad8fc54..fe11fed8e0d7 100644
--- a/arch/mips/sibyte/sb1250/time.c
+++ b/arch/mips/sibyte/sb1250/time.c
@@ -83,7 +83,7 @@ static void sibyte_set_mode(enum clock_event_mode mode,
83 void __iomem *timer_cfg, *timer_init; 83 void __iomem *timer_cfg, *timer_init;
84 84
85 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 85 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
86 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 86 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
87 87
88 switch(mode) { 88 switch(mode) {
89 case CLOCK_EVT_MODE_PERIODIC: 89 case CLOCK_EVT_MODE_PERIODIC:
@@ -111,7 +111,7 @@ sibyte_next_event(unsigned long delta, struct clock_event_device *evt)
111 void __iomem *timer_cfg, *timer_init; 111 void __iomem *timer_cfg, *timer_init;
112 112
113 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 113 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
114 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 114 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
115 115
116 __raw_writeq(0, timer_cfg); 116 __raw_writeq(0, timer_cfg);
117 __raw_writeq(delta, timer_init); 117 __raw_writeq(delta, timer_init);
@@ -155,7 +155,7 @@ static void sibyte_set_mode(enum clock_event_mode mode,
155 void __iomem *timer_cfg, *timer_init; 155 void __iomem *timer_cfg, *timer_init;
156 156
157 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 157 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
158 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 158 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
159 159
160 switch (mode) { 160 switch (mode) {
161 case CLOCK_EVT_MODE_PERIODIC: 161 case CLOCK_EVT_MODE_PERIODIC:
@@ -183,7 +183,7 @@ sibyte_next_event(unsigned long delta, struct clock_event_device *evt)
183 void __iomem *timer_cfg, *timer_init; 183 void __iomem *timer_cfg, *timer_init;
184 184
185 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 185 timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
186 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)); 186 timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
187 187
188 __raw_writeq(0, timer_cfg); 188 __raw_writeq(0, timer_cfg);
189 __raw_writeq(delta, timer_init); 189 __raw_writeq(delta, timer_init);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a3ae8e6c8b3b..3bd2688bd443 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -390,8 +390,8 @@ void apply_paravirt(struct paravirt_patch_site *start,
390 BUG_ON(p->len > MAX_PATCH_LEN); 390 BUG_ON(p->len > MAX_PATCH_LEN);
391 /* prep the buffer with the original instructions */ 391 /* prep the buffer with the original instructions */
392 memcpy(insnbuf, p->instr, p->len); 392 memcpy(insnbuf, p->instr, p->len);
393 used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, 393 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
394 (unsigned long)p->instr, p->len); 394 (unsigned long)p->instr, p->len);
395 395
396 BUG_ON(used > p->len); 396 BUG_ON(used > p->len);
397 397
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 8029742c0fc1..f1b7cdda82b3 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -116,12 +116,14 @@ void foo(void)
116 116
117#ifdef CONFIG_PARAVIRT 117#ifdef CONFIG_PARAVIRT
118 BLANK(); 118 BLANK();
119 OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); 119 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
120 OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); 120 OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
121 OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); 121 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
122 OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); 122 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
123 OFFSET(PARAVIRT_iret, paravirt_ops, iret); 123 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
124 OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); 124 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
125 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
126 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
125#endif 127#endif
126 128
127#ifdef CONFIG_XEN 129#ifdef CONFIG_XEN
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 8099fea0a72f..dc7f938e5015 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -437,7 +437,7 @@ ldt_ss:
437 * is still available to implement the setting of the high 437 * is still available to implement the setting of the high
438 * 16-bits in the INTERRUPT_RETURN paravirt-op. 438 * 16-bits in the INTERRUPT_RETURN paravirt-op.
439 */ 439 */
440 cmpl $0, paravirt_ops+PARAVIRT_enabled 440 cmpl $0, pv_info+PARAVIRT_enabled
441 jne restore_nocheck 441 jne restore_nocheck
442#endif 442#endif
443 443
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index 739cfb207dd7..6a80d67c2121 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -42,32 +42,33 @@ void _paravirt_nop(void)
42static void __init default_banner(void) 42static void __init default_banner(void)
43{ 43{
44 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 44 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
45 paravirt_ops.name); 45 pv_info.name);
46} 46}
47 47
48char *memory_setup(void) 48char *memory_setup(void)
49{ 49{
50 return paravirt_ops.memory_setup(); 50 return pv_init_ops.memory_setup();
51} 51}
52 52
53/* Simple instruction patching code. */ 53/* Simple instruction patching code. */
54#define DEF_NATIVE(name, code) \ 54#define DEF_NATIVE(ops, name, code) \
55 extern const char start_##name[], end_##name[]; \ 55 extern const char start_##ops##_##name[], end_##ops##_##name[]; \
56 asm("start_" #name ": " code "; end_" #name ":") 56 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
57 57
58DEF_NATIVE(irq_disable, "cli"); 58DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
59DEF_NATIVE(irq_enable, "sti"); 59DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
60DEF_NATIVE(restore_fl, "push %eax; popf"); 60DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
61DEF_NATIVE(save_fl, "pushf; pop %eax"); 61DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
62DEF_NATIVE(iret, "iret"); 62DEF_NATIVE(pv_cpu_ops, iret, "iret");
63DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); 63DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
64DEF_NATIVE(read_cr2, "mov %cr2, %eax"); 64DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
65DEF_NATIVE(write_cr3, "mov %eax, %cr3"); 65DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
66DEF_NATIVE(read_cr3, "mov %cr3, %eax"); 66DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
67DEF_NATIVE(clts, "clts"); 67DEF_NATIVE(pv_cpu_ops, clts, "clts");
68DEF_NATIVE(read_tsc, "rdtsc"); 68DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
69 69
70DEF_NATIVE(ud2a, "ud2a"); 70/* Undefined instruction for dealing with missing ops pointers. */
71static const unsigned char ud2a[] = { 0x0f, 0x0b };
71 72
72static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 73static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
73 unsigned long addr, unsigned len) 74 unsigned long addr, unsigned len)
@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
76 unsigned ret; 77 unsigned ret;
77 78
78 switch(type) { 79 switch(type) {
79#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site 80#define SITE(ops, x) \
80 SITE(irq_disable); 81 case PARAVIRT_PATCH(ops.x): \
81 SITE(irq_enable); 82 start = start_##ops##_##x; \
82 SITE(restore_fl); 83 end = end_##ops##_##x; \
83 SITE(save_fl); 84 goto patch_site
84 SITE(iret); 85
85 SITE(irq_enable_sysexit); 86 SITE(pv_irq_ops, irq_disable);
86 SITE(read_cr2); 87 SITE(pv_irq_ops, irq_enable);
87 SITE(read_cr3); 88 SITE(pv_irq_ops, restore_fl);
88 SITE(write_cr3); 89 SITE(pv_irq_ops, save_fl);
89 SITE(clts); 90 SITE(pv_cpu_ops, iret);
90 SITE(read_tsc); 91 SITE(pv_cpu_ops, irq_enable_sysexit);
92 SITE(pv_mmu_ops, read_cr2);
93 SITE(pv_mmu_ops, read_cr3);
94 SITE(pv_mmu_ops, write_cr3);
95 SITE(pv_cpu_ops, clts);
96 SITE(pv_cpu_ops, read_tsc);
91#undef SITE 97#undef SITE
92 98
93 patch_site: 99 patch_site:
94 ret = paravirt_patch_insns(ibuf, len, start, end); 100 ret = paravirt_patch_insns(ibuf, len, start, end);
95 break; 101 break;
96 102
97 case PARAVIRT_PATCH(make_pgd):
98 case PARAVIRT_PATCH(make_pte):
99 case PARAVIRT_PATCH(pgd_val):
100 case PARAVIRT_PATCH(pte_val):
101#ifdef CONFIG_X86_PAE
102 case PARAVIRT_PATCH(make_pmd):
103 case PARAVIRT_PATCH(pmd_val):
104#endif
105 /* These functions end up returning exactly what
106 they're passed, in the same registers. */
107 ret = paravirt_patch_nop();
108 break;
109
110 default: 103 default:
111 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 104 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
112 break; 105 break;
@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf,
150 return 5; 143 return 5;
151} 144}
152 145
153unsigned paravirt_patch_jmp(const void *target, void *insnbuf, 146unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
154 unsigned long addr, unsigned len) 147 unsigned long addr, unsigned len)
155{ 148{
156 struct branch *b = insnbuf; 149 struct branch *b = insnbuf;
@@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
165 return 5; 158 return 5;
166} 159}
167 160
161/* Neat trick to map patch type back to the call within the
162 * corresponding structure. */
163static void *get_call_destination(u8 type)
164{
165 struct paravirt_patch_template tmpl = {
166 .pv_init_ops = pv_init_ops,
167 .pv_time_ops = pv_time_ops,
168 .pv_cpu_ops = pv_cpu_ops,
169 .pv_irq_ops = pv_irq_ops,
170 .pv_apic_ops = pv_apic_ops,
171 .pv_mmu_ops = pv_mmu_ops,
172 };
173 return *((void **)&tmpl + type);
174}
175
168unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 176unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
169 unsigned long addr, unsigned len) 177 unsigned long addr, unsigned len)
170{ 178{
171 void *opfunc = *((void **)&paravirt_ops + type); 179 void *opfunc = get_call_destination(type);
172 unsigned ret; 180 unsigned ret;
173 181
174 if (opfunc == NULL) 182 if (opfunc == NULL)
175 /* If there's no function, patch it with a ud2a (BUG) */ 183 /* If there's no function, patch it with a ud2a (BUG) */
176 ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); 184 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
177 else if (opfunc == paravirt_nop) 185 else if (opfunc == paravirt_nop)
178 /* If the operation is a nop, then nop the callsite */ 186 /* If the operation is a nop, then nop the callsite */
179 ret = paravirt_patch_nop(); 187 ret = paravirt_patch_nop();
180 else if (type == PARAVIRT_PATCH(iret) || 188 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
181 type == PARAVIRT_PATCH(irq_enable_sysexit)) 189 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
182 /* If operation requires a jmp, then jmp */ 190 /* If operation requires a jmp, then jmp */
183 ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); 191 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
184 else 192 else
185 /* Otherwise call the function; assume target could 193 /* Otherwise call the function; assume target could
186 clobber any caller-save reg */ 194 clobber any caller-save reg */
@@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
205 213
206void init_IRQ(void) 214void init_IRQ(void)
207{ 215{
208 paravirt_ops.init_IRQ(); 216 pv_irq_ops.init_IRQ();
209} 217}
210 218
211static void native_flush_tlb(void) 219static void native_flush_tlb(void)
@@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void);
233 241
234static int __init print_banner(void) 242static int __init print_banner(void)
235{ 243{
236 paravirt_ops.banner(); 244 pv_init_ops.banner();
237 return 0; 245 return 0;
238} 246}
239core_initcall(print_banner); 247core_initcall(print_banner);
@@ -273,47 +281,96 @@ int paravirt_disable_iospace(void)
273 return ret; 281 return ret;
274} 282}
275 283
276struct paravirt_ops paravirt_ops = { 284static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
285
286static inline void enter_lazy(enum paravirt_lazy_mode mode)
287{
288 BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
289 BUG_ON(preemptible());
290
291 x86_write_percpu(paravirt_lazy_mode, mode);
292}
293
294void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
295{
296 BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode);
297 BUG_ON(preemptible());
298
299 x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
300}
301
302void paravirt_enter_lazy_mmu(void)
303{
304 enter_lazy(PARAVIRT_LAZY_MMU);
305}
306
307void paravirt_leave_lazy_mmu(void)
308{
309 paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
310}
311
312void paravirt_enter_lazy_cpu(void)
313{
314 enter_lazy(PARAVIRT_LAZY_CPU);
315}
316
317void paravirt_leave_lazy_cpu(void)
318{
319 paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
320}
321
322enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
323{
324 return x86_read_percpu(paravirt_lazy_mode);
325}
326
327struct pv_info pv_info = {
277 .name = "bare hardware", 328 .name = "bare hardware",
278 .paravirt_enabled = 0, 329 .paravirt_enabled = 0,
279 .kernel_rpl = 0, 330 .kernel_rpl = 0,
280 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 331 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
332};
281 333
282 .patch = native_patch, 334struct pv_init_ops pv_init_ops = {
335 .patch = native_patch,
283 .banner = default_banner, 336 .banner = default_banner,
284 .arch_setup = paravirt_nop, 337 .arch_setup = paravirt_nop,
285 .memory_setup = machine_specific_memory_setup, 338 .memory_setup = machine_specific_memory_setup,
339};
340
341struct pv_time_ops pv_time_ops = {
342 .time_init = hpet_time_init,
286 .get_wallclock = native_get_wallclock, 343 .get_wallclock = native_get_wallclock,
287 .set_wallclock = native_set_wallclock, 344 .set_wallclock = native_set_wallclock,
288 .time_init = hpet_time_init, 345 .sched_clock = native_sched_clock,
346 .get_cpu_khz = native_calculate_cpu_khz,
347};
348
349struct pv_irq_ops pv_irq_ops = {
289 .init_IRQ = native_init_IRQ, 350 .init_IRQ = native_init_IRQ,
351 .save_fl = native_save_fl,
352 .restore_fl = native_restore_fl,
353 .irq_disable = native_irq_disable,
354 .irq_enable = native_irq_enable,
355 .safe_halt = native_safe_halt,
356 .halt = native_halt,
357};
290 358
359struct pv_cpu_ops pv_cpu_ops = {
291 .cpuid = native_cpuid, 360 .cpuid = native_cpuid,
292 .get_debugreg = native_get_debugreg, 361 .get_debugreg = native_get_debugreg,
293 .set_debugreg = native_set_debugreg, 362 .set_debugreg = native_set_debugreg,
294 .clts = native_clts, 363 .clts = native_clts,
295 .read_cr0 = native_read_cr0, 364 .read_cr0 = native_read_cr0,
296 .write_cr0 = native_write_cr0, 365 .write_cr0 = native_write_cr0,
297 .read_cr2 = native_read_cr2,
298 .write_cr2 = native_write_cr2,
299 .read_cr3 = native_read_cr3,
300 .write_cr3 = native_write_cr3,
301 .read_cr4 = native_read_cr4, 366 .read_cr4 = native_read_cr4,
302 .read_cr4_safe = native_read_cr4_safe, 367 .read_cr4_safe = native_read_cr4_safe,
303 .write_cr4 = native_write_cr4, 368 .write_cr4 = native_write_cr4,
304 .save_fl = native_save_fl,
305 .restore_fl = native_restore_fl,
306 .irq_disable = native_irq_disable,
307 .irq_enable = native_irq_enable,
308 .safe_halt = native_safe_halt,
309 .halt = native_halt,
310 .wbinvd = native_wbinvd, 369 .wbinvd = native_wbinvd,
311 .read_msr = native_read_msr_safe, 370 .read_msr = native_read_msr_safe,
312 .write_msr = native_write_msr_safe, 371 .write_msr = native_write_msr_safe,
313 .read_tsc = native_read_tsc, 372 .read_tsc = native_read_tsc,
314 .read_pmc = native_read_pmc, 373 .read_pmc = native_read_pmc,
315 .sched_clock = native_sched_clock,
316 .get_cpu_khz = native_calculate_cpu_khz,
317 .load_tr_desc = native_load_tr_desc, 374 .load_tr_desc = native_load_tr_desc,
318 .set_ldt = native_set_ldt, 375 .set_ldt = native_set_ldt,
319 .load_gdt = native_load_gdt, 376 .load_gdt = native_load_gdt,
@@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = {
327 .write_idt_entry = write_dt_entry, 384 .write_idt_entry = write_dt_entry,
328 .load_esp0 = native_load_esp0, 385 .load_esp0 = native_load_esp0,
329 386
387 .irq_enable_sysexit = native_irq_enable_sysexit,
388 .iret = native_iret,
389
330 .set_iopl_mask = native_set_iopl_mask, 390 .set_iopl_mask = native_set_iopl_mask,
331 .io_delay = native_io_delay, 391 .io_delay = native_io_delay,
332 392
393 .lazy_mode = {
394 .enter = paravirt_nop,
395 .leave = paravirt_nop,
396 },
397};
398
399struct pv_apic_ops pv_apic_ops = {
333#ifdef CONFIG_X86_LOCAL_APIC 400#ifdef CONFIG_X86_LOCAL_APIC
334 .apic_write = native_apic_write, 401 .apic_write = native_apic_write,
335 .apic_write_atomic = native_apic_write_atomic, 402 .apic_write_atomic = native_apic_write_atomic,
@@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = {
338 .setup_secondary_clock = setup_secondary_APIC_clock, 405 .setup_secondary_clock = setup_secondary_APIC_clock,
339 .startup_ipi_hook = paravirt_nop, 406 .startup_ipi_hook = paravirt_nop,
340#endif 407#endif
341 .set_lazy_mode = paravirt_nop, 408};
342 409
410struct pv_mmu_ops pv_mmu_ops = {
343 .pagetable_setup_start = native_pagetable_setup_start, 411 .pagetable_setup_start = native_pagetable_setup_start,
344 .pagetable_setup_done = native_pagetable_setup_done, 412 .pagetable_setup_done = native_pagetable_setup_done,
345 413
414 .read_cr2 = native_read_cr2,
415 .write_cr2 = native_write_cr2,
416 .read_cr3 = native_read_cr3,
417 .write_cr3 = native_write_cr3,
418
346 .flush_tlb_user = native_flush_tlb, 419 .flush_tlb_user = native_flush_tlb,
347 .flush_tlb_kernel = native_flush_tlb_global, 420 .flush_tlb_kernel = native_flush_tlb_global,
348 .flush_tlb_single = native_flush_tlb_single, 421 .flush_tlb_single = native_flush_tlb_single,
@@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = {
381 .make_pte = native_make_pte, 454 .make_pte = native_make_pte,
382 .make_pgd = native_make_pgd, 455 .make_pgd = native_make_pgd,
383 456
384 .irq_enable_sysexit = native_irq_enable_sysexit,
385 .iret = native_iret,
386
387 .dup_mmap = paravirt_nop, 457 .dup_mmap = paravirt_nop,
388 .exit_mmap = paravirt_nop, 458 .exit_mmap = paravirt_nop,
389 .activate_mm = paravirt_nop, 459 .activate_mm = paravirt_nop,
460
461 .lazy_mode = {
462 .enter = paravirt_nop,
463 .leave = paravirt_nop,
464 },
390}; 465};
391 466
392EXPORT_SYMBOL(paravirt_ops); 467EXPORT_SYMBOL_GPL(pv_time_ops);
468EXPORT_SYMBOL_GPL(pv_cpu_ops);
469EXPORT_SYMBOL_GPL(pv_mmu_ops);
470EXPORT_SYMBOL_GPL(pv_apic_ops);
471EXPORT_SYMBOL_GPL(pv_info);
472EXPORT_SYMBOL (pv_irq_ops);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 18673e0f193b..f02bad68abaa 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
134 unsigned long eip, unsigned len) 134 unsigned long eip, unsigned len)
135{ 135{
136 switch (type) { 136 switch (type) {
137 case PARAVIRT_PATCH(irq_disable): 137 case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
138 return patch_internal(VMI_CALL_DisableInterrupts, len, 138 return patch_internal(VMI_CALL_DisableInterrupts, len,
139 insns, eip); 139 insns, eip);
140 case PARAVIRT_PATCH(irq_enable): 140 case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
141 return patch_internal(VMI_CALL_EnableInterrupts, len, 141 return patch_internal(VMI_CALL_EnableInterrupts, len,
142 insns, eip); 142 insns, eip);
143 case PARAVIRT_PATCH(restore_fl): 143 case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
144 return patch_internal(VMI_CALL_SetInterruptMask, len, 144 return patch_internal(VMI_CALL_SetInterruptMask, len,
145 insns, eip); 145 insns, eip);
146 case PARAVIRT_PATCH(save_fl): 146 case PARAVIRT_PATCH(pv_irq_ops.save_fl):
147 return patch_internal(VMI_CALL_GetInterruptMask, len, 147 return patch_internal(VMI_CALL_GetInterruptMask, len,
148 insns, eip); 148 insns, eip);
149 case PARAVIRT_PATCH(iret): 149 case PARAVIRT_PATCH(pv_cpu_ops.iret):
150 return patch_internal(VMI_CALL_IRET, len, insns, eip); 150 return patch_internal(VMI_CALL_IRET, len, insns, eip);
151 case PARAVIRT_PATCH(irq_enable_sysexit): 151 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
152 return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); 152 return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
153 default: 153 default:
154 break; 154 break;
@@ -552,24 +552,22 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
552} 552}
553#endif 553#endif
554 554
555static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode) 555static void vmi_enter_lazy_cpu(void)
556{ 556{
557 static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode); 557 paravirt_enter_lazy_cpu();
558 558 vmi_ops.set_lazy_mode(2);
559 if (!vmi_ops.set_lazy_mode) 559}
560 return;
561 560
562 /* Modes should never nest or overlap */ 561static void vmi_enter_lazy_mmu(void)
563 BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE || 562{
564 mode == PARAVIRT_LAZY_FLUSH)); 563 paravirt_enter_lazy_mmu();
564 vmi_ops.set_lazy_mode(1);
565}
565 566
566 if (mode == PARAVIRT_LAZY_FLUSH) { 567static void vmi_leave_lazy(void)
567 vmi_ops.set_lazy_mode(0); 568{
568 vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode)); 569 paravirt_leave_lazy(paravirt_get_lazy_mode());
569 } else { 570 vmi_ops.set_lazy_mode(0);
570 vmi_ops.set_lazy_mode(mode);
571 __get_cpu_var(lazy_mode) = mode;
572 }
573} 571}
574 572
575static inline int __init check_vmi_rom(struct vrom_header *rom) 573static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -690,9 +688,9 @@ do { \
690 reloc = call_vrom_long_func(vmi_rom, get_reloc, \ 688 reloc = call_vrom_long_func(vmi_rom, get_reloc, \
691 VMI_CALL_##vmicall); \ 689 VMI_CALL_##vmicall); \
692 if (rel->type == VMI_RELOCATION_CALL_REL) \ 690 if (rel->type == VMI_RELOCATION_CALL_REL) \
693 paravirt_ops.opname = (void *)rel->eip; \ 691 opname = (void *)rel->eip; \
694 else if (rel->type == VMI_RELOCATION_NOP) \ 692 else if (rel->type == VMI_RELOCATION_NOP) \
695 paravirt_ops.opname = (void *)vmi_nop; \ 693 opname = (void *)vmi_nop; \
696 else if (rel->type != VMI_RELOCATION_NONE) \ 694 else if (rel->type != VMI_RELOCATION_NONE) \
697 printk(KERN_WARNING "VMI: Unknown relocation " \ 695 printk(KERN_WARNING "VMI: Unknown relocation " \
698 "type %d for " #vmicall"\n",\ 696 "type %d for " #vmicall"\n",\
@@ -712,7 +710,7 @@ do { \
712 VMI_CALL_##vmicall); \ 710 VMI_CALL_##vmicall); \
713 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ 711 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
714 if (rel->type == VMI_RELOCATION_CALL_REL) { \ 712 if (rel->type == VMI_RELOCATION_CALL_REL) { \
715 paravirt_ops.opname = wrapper; \ 713 opname = wrapper; \
716 vmi_ops.cache = (void *)rel->eip; \ 714 vmi_ops.cache = (void *)rel->eip; \
717 } \ 715 } \
718} while (0) 716} while (0)
@@ -732,11 +730,11 @@ static inline int __init activate_vmi(void)
732 } 730 }
733 savesegment(cs, kernel_cs); 731 savesegment(cs, kernel_cs);
734 732
735 paravirt_ops.paravirt_enabled = 1; 733 pv_info.paravirt_enabled = 1;
736 paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; 734 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
735 pv_info.name = "vmi";
737 736
738 paravirt_ops.patch = vmi_patch; 737 pv_init_ops.patch = vmi_patch;
739 paravirt_ops.name = "vmi";
740 738
741 /* 739 /*
742 * Many of these operations are ABI compatible with VMI. 740 * Many of these operations are ABI compatible with VMI.
@@ -754,26 +752,26 @@ static inline int __init activate_vmi(void)
754 */ 752 */
755 753
756 /* CPUID is special, so very special it gets wrapped like a present */ 754 /* CPUID is special, so very special it gets wrapped like a present */
757 para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); 755 para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID);
758 756
759 para_fill(clts, CLTS); 757 para_fill(pv_cpu_ops.clts, CLTS);
760 para_fill(get_debugreg, GetDR); 758 para_fill(pv_cpu_ops.get_debugreg, GetDR);
761 para_fill(set_debugreg, SetDR); 759 para_fill(pv_cpu_ops.set_debugreg, SetDR);
762 para_fill(read_cr0, GetCR0); 760 para_fill(pv_cpu_ops.read_cr0, GetCR0);
763 para_fill(read_cr2, GetCR2); 761 para_fill(pv_mmu_ops.read_cr2, GetCR2);
764 para_fill(read_cr3, GetCR3); 762 para_fill(pv_mmu_ops.read_cr3, GetCR3);
765 para_fill(read_cr4, GetCR4); 763 para_fill(pv_cpu_ops.read_cr4, GetCR4);
766 para_fill(write_cr0, SetCR0); 764 para_fill(pv_cpu_ops.write_cr0, SetCR0);
767 para_fill(write_cr2, SetCR2); 765 para_fill(pv_mmu_ops.write_cr2, SetCR2);
768 para_fill(write_cr3, SetCR3); 766 para_fill(pv_mmu_ops.write_cr3, SetCR3);
769 para_fill(write_cr4, SetCR4); 767 para_fill(pv_cpu_ops.write_cr4, SetCR4);
770 para_fill(save_fl, GetInterruptMask); 768 para_fill(pv_irq_ops.save_fl, GetInterruptMask);
771 para_fill(restore_fl, SetInterruptMask); 769 para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
772 para_fill(irq_disable, DisableInterrupts); 770 para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
773 para_fill(irq_enable, EnableInterrupts); 771 para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
774 772
775 para_fill(wbinvd, WBINVD); 773 para_fill(pv_cpu_ops.wbinvd, WBINVD);
776 para_fill(read_tsc, RDTSC); 774 para_fill(pv_cpu_ops.read_tsc, RDTSC);
777 775
778 /* The following we emulate with trap and emulate for now */ 776 /* The following we emulate with trap and emulate for now */
779 /* paravirt_ops.read_msr = vmi_rdmsr */ 777 /* paravirt_ops.read_msr = vmi_rdmsr */
@@ -781,29 +779,38 @@ static inline int __init activate_vmi(void)
781 /* paravirt_ops.rdpmc = vmi_rdpmc */ 779 /* paravirt_ops.rdpmc = vmi_rdpmc */
782 780
783 /* TR interface doesn't pass TR value, wrap */ 781 /* TR interface doesn't pass TR value, wrap */
784 para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); 782 para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR);
785 783
786 /* LDT is special, too */ 784 /* LDT is special, too */
787 para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); 785 para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
788 786
789 para_fill(load_gdt, SetGDT); 787 para_fill(pv_cpu_ops.load_gdt, SetGDT);
790 para_fill(load_idt, SetIDT); 788 para_fill(pv_cpu_ops.load_idt, SetIDT);
791 para_fill(store_gdt, GetGDT); 789 para_fill(pv_cpu_ops.store_gdt, GetGDT);
792 para_fill(store_idt, GetIDT); 790 para_fill(pv_cpu_ops.store_idt, GetIDT);
793 para_fill(store_tr, GetTR); 791 para_fill(pv_cpu_ops.store_tr, GetTR);
794 paravirt_ops.load_tls = vmi_load_tls; 792 pv_cpu_ops.load_tls = vmi_load_tls;
795 para_fill(write_ldt_entry, WriteLDTEntry); 793 para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
796 para_fill(write_gdt_entry, WriteGDTEntry); 794 para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
797 para_fill(write_idt_entry, WriteIDTEntry); 795 para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
798 para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); 796 para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
799 para_fill(set_iopl_mask, SetIOPLMask); 797 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
800 para_fill(io_delay, IODelay); 798 para_fill(pv_cpu_ops.io_delay, IODelay);
801 para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); 799
800 para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
801 set_lazy_mode, SetLazyMode);
802 para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
803 set_lazy_mode, SetLazyMode);
804
805 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
806 set_lazy_mode, SetLazyMode);
807 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
808 set_lazy_mode, SetLazyMode);
802 809
803 /* user and kernel flush are just handled with different flags to FlushTLB */ 810 /* user and kernel flush are just handled with different flags to FlushTLB */
804 para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); 811 para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
805 para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); 812 para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
806 para_fill(flush_tlb_single, InvalPage); 813 para_fill(pv_mmu_ops.flush_tlb_single, InvalPage);
807 814
808 /* 815 /*
809 * Until a standard flag format can be agreed on, we need to 816 * Until a standard flag format can be agreed on, we need to
@@ -819,41 +826,41 @@ static inline int __init activate_vmi(void)
819#endif 826#endif
820 827
821 if (vmi_ops.set_pte) { 828 if (vmi_ops.set_pte) {
822 paravirt_ops.set_pte = vmi_set_pte; 829 pv_mmu_ops.set_pte = vmi_set_pte;
823 paravirt_ops.set_pte_at = vmi_set_pte_at; 830 pv_mmu_ops.set_pte_at = vmi_set_pte_at;
824 paravirt_ops.set_pmd = vmi_set_pmd; 831 pv_mmu_ops.set_pmd = vmi_set_pmd;
825#ifdef CONFIG_X86_PAE 832#ifdef CONFIG_X86_PAE
826 paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; 833 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
827 paravirt_ops.set_pte_present = vmi_set_pte_present; 834 pv_mmu_ops.set_pte_present = vmi_set_pte_present;
828 paravirt_ops.set_pud = vmi_set_pud; 835 pv_mmu_ops.set_pud = vmi_set_pud;
829 paravirt_ops.pte_clear = vmi_pte_clear; 836 pv_mmu_ops.pte_clear = vmi_pte_clear;
830 paravirt_ops.pmd_clear = vmi_pmd_clear; 837 pv_mmu_ops.pmd_clear = vmi_pmd_clear;
831#endif 838#endif
832 } 839 }
833 840
834 if (vmi_ops.update_pte) { 841 if (vmi_ops.update_pte) {
835 paravirt_ops.pte_update = vmi_update_pte; 842 pv_mmu_ops.pte_update = vmi_update_pte;
836 paravirt_ops.pte_update_defer = vmi_update_pte_defer; 843 pv_mmu_ops.pte_update_defer = vmi_update_pte_defer;
837 } 844 }
838 845
839 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); 846 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
840 if (vmi_ops.allocate_page) { 847 if (vmi_ops.allocate_page) {
841 paravirt_ops.alloc_pt = vmi_allocate_pt; 848 pv_mmu_ops.alloc_pt = vmi_allocate_pt;
842 paravirt_ops.alloc_pd = vmi_allocate_pd; 849 pv_mmu_ops.alloc_pd = vmi_allocate_pd;
843 paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; 850 pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
844 } 851 }
845 852
846 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); 853 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
847 if (vmi_ops.release_page) { 854 if (vmi_ops.release_page) {
848 paravirt_ops.release_pt = vmi_release_pt; 855 pv_mmu_ops.release_pt = vmi_release_pt;
849 paravirt_ops.release_pd = vmi_release_pd; 856 pv_mmu_ops.release_pd = vmi_release_pd;
850 } 857 }
851 858
852 /* Set linear is needed in all cases */ 859 /* Set linear is needed in all cases */
853 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); 860 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
854#ifdef CONFIG_HIGHPTE 861#ifdef CONFIG_HIGHPTE
855 if (vmi_ops.set_linear_mapping) 862 if (vmi_ops.set_linear_mapping)
856 paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; 863 pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
857#endif 864#endif
858 865
859 /* 866 /*
@@ -863,17 +870,17 @@ static inline int __init activate_vmi(void)
863 * the backend. They are performance critical anyway, so requiring 870 * the backend. They are performance critical anyway, so requiring
864 * a patch is not a big problem. 871 * a patch is not a big problem.
865 */ 872 */
866 paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0; 873 pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
867 paravirt_ops.iret = (void *)0xbadbab0; 874 pv_cpu_ops.iret = (void *)0xbadbab0;
868 875
869#ifdef CONFIG_SMP 876#ifdef CONFIG_SMP
870 para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); 877 para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
871#endif 878#endif
872 879
873#ifdef CONFIG_X86_LOCAL_APIC 880#ifdef CONFIG_X86_LOCAL_APIC
874 para_fill(apic_read, APICRead); 881 para_fill(pv_apic_ops.apic_read, APICRead);
875 para_fill(apic_write, APICWrite); 882 para_fill(pv_apic_ops.apic_write, APICWrite);
876 para_fill(apic_write_atomic, APICWrite); 883 para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
877#endif 884#endif
878 885
879 /* 886 /*
@@ -891,15 +898,15 @@ static inline int __init activate_vmi(void)
891 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); 898 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
892 vmi_timer_ops.cancel_alarm = 899 vmi_timer_ops.cancel_alarm =
893 vmi_get_function(VMI_CALL_CancelAlarm); 900 vmi_get_function(VMI_CALL_CancelAlarm);
894 paravirt_ops.time_init = vmi_time_init; 901 pv_time_ops.time_init = vmi_time_init;
895 paravirt_ops.get_wallclock = vmi_get_wallclock; 902 pv_time_ops.get_wallclock = vmi_get_wallclock;
896 paravirt_ops.set_wallclock = vmi_set_wallclock; 903 pv_time_ops.set_wallclock = vmi_set_wallclock;
897#ifdef CONFIG_X86_LOCAL_APIC 904#ifdef CONFIG_X86_LOCAL_APIC
898 paravirt_ops.setup_boot_clock = vmi_time_bsp_init; 905 pv_apic_ops.setup_boot_clock = vmi_time_bsp_init;
899 paravirt_ops.setup_secondary_clock = vmi_time_ap_init; 906 pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
900#endif 907#endif
901 paravirt_ops.sched_clock = vmi_sched_clock; 908 pv_time_ops.sched_clock = vmi_sched_clock;
902 paravirt_ops.get_cpu_khz = vmi_cpu_khz; 909 pv_time_ops.get_cpu_khz = vmi_cpu_khz;
903 910
904 /* We have true wallclock functions; disable CMOS clock sync */ 911 /* We have true wallclock functions; disable CMOS clock sync */
905 no_sync_cmos_clock = 1; 912 no_sync_cmos_clock = 1;
@@ -908,7 +915,7 @@ static inline int __init activate_vmi(void)
908 disable_vmi_timer = 1; 915 disable_vmi_timer = 1;
909 } 916 }
910 917
911 para_fill(safe_halt, Halt); 918 para_fill(pv_irq_ops.safe_halt, Halt);
912 919
913 /* 920 /*
914 * Alternative instruction rewriting doesn't happen soon enough 921 * Alternative instruction rewriting doesn't happen soon enough
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e4e37d4f4c52..c7d19471261d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -748,24 +748,12 @@ struct kmem_cache *pmd_cache;
748 748
749void __init pgtable_cache_init(void) 749void __init pgtable_cache_init(void)
750{ 750{
751 size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); 751 if (PTRS_PER_PMD > 1)
752
753 if (PTRS_PER_PMD > 1) {
754 pmd_cache = kmem_cache_create("pmd", 752 pmd_cache = kmem_cache_create("pmd",
755 PTRS_PER_PMD*sizeof(pmd_t), 753 PTRS_PER_PMD*sizeof(pmd_t),
756 PTRS_PER_PMD*sizeof(pmd_t), 754 PTRS_PER_PMD*sizeof(pmd_t),
757 SLAB_PANIC, 755 SLAB_PANIC,
758 pmd_ctor); 756 pmd_ctor);
759 if (!SHARED_KERNEL_PMD) {
760 /* If we're in PAE mode and have a non-shared
761 kernel pmd, then the pgd size must be a
762 page size. This is because the pgd_list
763 links through the page structure, so there
764 can only be one pgd per page for this to
765 work. */
766 pgd_size = PAGE_SIZE;
767 }
768 }
769} 757}
770 758
771/* 759/*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 265f7dd3234b..94c39aaf695f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -51,11 +51,25 @@
51 51
52EXPORT_SYMBOL_GPL(hypercall_page); 52EXPORT_SYMBOL_GPL(hypercall_page);
53 53
54DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
55
56DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 54DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
57DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 55DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
58DEFINE_PER_CPU(unsigned long, xen_cr3); 56
57/*
58 * Note about cr3 (pagetable base) values:
59 *
60 * xen_cr3 contains the current logical cr3 value; it contains the
61 * last set cr3. This may not be the current effective cr3, because
62 * its update may be being lazily deferred. However, a vcpu looking
63 * at its own cr3 can use this value knowing that it everything will
64 * be self-consistent.
65 *
66 * xen_current_cr3 contains the actual vcpu cr3; it is set once the
67 * hypercall to set the vcpu cr3 is complete (so it may be a little
68 * out of date, but it will never be set early). If one vcpu is
69 * looking at another vcpu's cr3 value, it should use this variable.
70 */
71DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
72DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
59 73
60struct start_info *xen_start_info; 74struct start_info *xen_start_info;
61EXPORT_SYMBOL_GPL(xen_start_info); 75EXPORT_SYMBOL_GPL(xen_start_info);
@@ -99,7 +113,7 @@ static void __init xen_vcpu_setup(int cpu)
99 info.mfn = virt_to_mfn(vcpup); 113 info.mfn = virt_to_mfn(vcpup);
100 info.offset = offset_in_page(vcpup); 114 info.offset = offset_in_page(vcpup);
101 115
102 printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n", 116 printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
103 cpu, vcpup, info.mfn, info.offset); 117 cpu, vcpup, info.mfn, info.offset);
104 118
105 /* Check to see if the hypervisor will put the vcpu_info 119 /* Check to see if the hypervisor will put the vcpu_info
@@ -123,7 +137,7 @@ static void __init xen_vcpu_setup(int cpu)
123static void __init xen_banner(void) 137static void __init xen_banner(void)
124{ 138{
125 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 139 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
126 paravirt_ops.name); 140 pv_info.name);
127 printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); 141 printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
128} 142}
129 143
@@ -248,29 +262,10 @@ static void xen_halt(void)
248 xen_safe_halt(); 262 xen_safe_halt();
249} 263}
250 264
251static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) 265static void xen_leave_lazy(void)
252{ 266{
253 BUG_ON(preemptible()); 267 paravirt_leave_lazy(paravirt_get_lazy_mode());
254
255 switch (mode) {
256 case PARAVIRT_LAZY_NONE:
257 BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
258 break;
259
260 case PARAVIRT_LAZY_MMU:
261 case PARAVIRT_LAZY_CPU:
262 BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
263 break;
264
265 case PARAVIRT_LAZY_FLUSH:
266 /* flush if necessary, but don't change state */
267 if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
268 xen_mc_flush();
269 return;
270 }
271
272 xen_mc_flush(); 268 xen_mc_flush();
273 x86_write_percpu(xen_lazy_mode, mode);
274} 269}
275 270
276static unsigned long xen_store_tr(void) 271static unsigned long xen_store_tr(void)
@@ -357,7 +352,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
357 * loaded properly. This will go away as soon as Xen has been 352 * loaded properly. This will go away as soon as Xen has been
358 * modified to not save/restore %gs for normal hypercalls. 353 * modified to not save/restore %gs for normal hypercalls.
359 */ 354 */
360 if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU) 355 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
361 loadsegment(gs, 0); 356 loadsegment(gs, 0);
362} 357}
363 358
@@ -631,32 +626,36 @@ static unsigned long xen_read_cr3(void)
631 return x86_read_percpu(xen_cr3); 626 return x86_read_percpu(xen_cr3);
632} 627}
633 628
629static void set_current_cr3(void *v)
630{
631 x86_write_percpu(xen_current_cr3, (unsigned long)v);
632}
633
634static void xen_write_cr3(unsigned long cr3) 634static void xen_write_cr3(unsigned long cr3)
635{ 635{
636 struct mmuext_op *op;
637 struct multicall_space mcs;
638 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
639
636 BUG_ON(preemptible()); 640 BUG_ON(preemptible());
637 641
638 if (cr3 == x86_read_percpu(xen_cr3)) { 642 mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */
639 /* just a simple tlb flush */
640 xen_flush_tlb();
641 return;
642 }
643 643
644 /* Update while interrupts are disabled, so its atomic with
645 respect to ipis */
644 x86_write_percpu(xen_cr3, cr3); 646 x86_write_percpu(xen_cr3, cr3);
645 647
648 op = mcs.args;
649 op->cmd = MMUEXT_NEW_BASEPTR;
650 op->arg1.mfn = mfn;
646 651
647 { 652 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
648 struct mmuext_op *op;
649 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
650 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
651
652 op = mcs.args;
653 op->cmd = MMUEXT_NEW_BASEPTR;
654 op->arg1.mfn = mfn;
655 653
656 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 654 /* Update xen_update_cr3 once the batch has actually
655 been submitted. */
656 xen_mc_callback(set_current_cr3, (void *)cr3);
657 657
658 xen_mc_issue(PARAVIRT_LAZY_CPU); 658 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
659 }
660} 659}
661 660
662/* Early in boot, while setting up the initial pagetable, assume 661/* Early in boot, while setting up the initial pagetable, assume
@@ -667,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
667 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 666 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
668} 667}
669 668
669static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
670{
671 struct mmuext_op op;
672 op.cmd = level;
673 op.arg1.mfn = pfn_to_mfn(pfn);
674 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
675 BUG();
676}
677
670/* This needs to make sure the new pte page is pinned iff its being 678/* This needs to make sure the new pte page is pinned iff its being
671 attached to a pinned pagetable. */ 679 attached to a pinned pagetable. */
672static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) 680static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
@@ -676,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
676 if (PagePinned(virt_to_page(mm->pgd))) { 684 if (PagePinned(virt_to_page(mm->pgd))) {
677 SetPagePinned(page); 685 SetPagePinned(page);
678 686
679 if (!PageHighMem(page)) 687 if (!PageHighMem(page)) {
680 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 688 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
681 else 689 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
690 } else
682 /* make sure there are no stray mappings of 691 /* make sure there are no stray mappings of
683 this page */ 692 this page */
684 kmap_flush_unused(); 693 kmap_flush_unused();
@@ -691,8 +700,10 @@ static void xen_release_pt(u32 pfn)
691 struct page *page = pfn_to_page(pfn); 700 struct page *page = pfn_to_page(pfn);
692 701
693 if (PagePinned(page)) { 702 if (PagePinned(page)) {
694 if (!PageHighMem(page)) 703 if (!PageHighMem(page)) {
704 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
695 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 705 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
706 }
696 } 707 }
697} 708}
698 709
@@ -737,7 +748,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
737 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; 748 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
738 749
739 /* special set_pte for pagetable initialization */ 750 /* special set_pte for pagetable initialization */
740 paravirt_ops.set_pte = xen_set_pte_init; 751 pv_mmu_ops.set_pte = xen_set_pte_init;
741 752
742 init_mm.pgd = base; 753 init_mm.pgd = base;
743 /* 754 /*
@@ -784,8 +795,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
784{ 795{
785 /* This will work as long as patching hasn't happened yet 796 /* This will work as long as patching hasn't happened yet
786 (which it hasn't) */ 797 (which it hasn't) */
787 paravirt_ops.alloc_pt = xen_alloc_pt; 798 pv_mmu_ops.alloc_pt = xen_alloc_pt;
788 paravirt_ops.set_pte = xen_set_pte; 799 pv_mmu_ops.set_pte = xen_set_pte;
789 800
790 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 801 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
791 /* 802 /*
@@ -807,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
807 /* Actually pin the pagetable down, but we can't set PG_pinned 818 /* Actually pin the pagetable down, but we can't set PG_pinned
808 yet because the page structures don't exist yet. */ 819 yet because the page structures don't exist yet. */
809 { 820 {
810 struct mmuext_op op; 821 unsigned level;
822
811#ifdef CONFIG_X86_PAE 823#ifdef CONFIG_X86_PAE
812 op.cmd = MMUEXT_PIN_L3_TABLE; 824 level = MMUEXT_PIN_L3_TABLE;
813#else 825#else
814 op.cmd = MMUEXT_PIN_L3_TABLE; 826 level = MMUEXT_PIN_L2_TABLE;
815#endif 827#endif
816 op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base))); 828
817 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) 829 pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
818 BUG();
819 } 830 }
820} 831}
821 832
@@ -832,12 +843,12 @@ void __init xen_setup_vcpu_info_placement(void)
832 if (have_vcpu_info_placement) { 843 if (have_vcpu_info_placement) {
833 printk(KERN_INFO "Xen: using vcpu_info placement\n"); 844 printk(KERN_INFO "Xen: using vcpu_info placement\n");
834 845
835 paravirt_ops.save_fl = xen_save_fl_direct; 846 pv_irq_ops.save_fl = xen_save_fl_direct;
836 paravirt_ops.restore_fl = xen_restore_fl_direct; 847 pv_irq_ops.restore_fl = xen_restore_fl_direct;
837 paravirt_ops.irq_disable = xen_irq_disable_direct; 848 pv_irq_ops.irq_disable = xen_irq_disable_direct;
838 paravirt_ops.irq_enable = xen_irq_enable_direct; 849 pv_irq_ops.irq_enable = xen_irq_enable_direct;
839 paravirt_ops.read_cr2 = xen_read_cr2_direct; 850 pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
840 paravirt_ops.iret = xen_iret_direct; 851 pv_cpu_ops.iret = xen_iret_direct;
841 } 852 }
842} 853}
843 854
@@ -849,8 +860,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
849 860
850 start = end = reloc = NULL; 861 start = end = reloc = NULL;
851 862
852#define SITE(x) \ 863#define SITE(op, x) \
853 case PARAVIRT_PATCH(x): \ 864 case PARAVIRT_PATCH(op.x): \
854 if (have_vcpu_info_placement) { \ 865 if (have_vcpu_info_placement) { \
855 start = (char *)xen_##x##_direct; \ 866 start = (char *)xen_##x##_direct; \
856 end = xen_##x##_direct_end; \ 867 end = xen_##x##_direct_end; \
@@ -859,10 +870,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
859 goto patch_site 870 goto patch_site
860 871
861 switch (type) { 872 switch (type) {
862 SITE(irq_enable); 873 SITE(pv_irq_ops, irq_enable);
863 SITE(irq_disable); 874 SITE(pv_irq_ops, irq_disable);
864 SITE(save_fl); 875 SITE(pv_irq_ops, save_fl);
865 SITE(restore_fl); 876 SITE(pv_irq_ops, restore_fl);
866#undef SITE 877#undef SITE
867 878
868 patch_site: 879 patch_site:
@@ -894,26 +905,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
894 return ret; 905 return ret;
895} 906}
896 907
897static const struct paravirt_ops xen_paravirt_ops __initdata = { 908static const struct pv_info xen_info __initdata = {
898 .paravirt_enabled = 1, 909 .paravirt_enabled = 1,
899 .shared_kernel_pmd = 0, 910 .shared_kernel_pmd = 0,
900 911
901 .name = "Xen", 912 .name = "Xen",
902 .banner = xen_banner, 913};
903 914
915static const struct pv_init_ops xen_init_ops __initdata = {
904 .patch = xen_patch, 916 .patch = xen_patch,
905 917
918 .banner = xen_banner,
906 .memory_setup = xen_memory_setup, 919 .memory_setup = xen_memory_setup,
907 .arch_setup = xen_arch_setup, 920 .arch_setup = xen_arch_setup,
908 .init_IRQ = xen_init_IRQ,
909 .post_allocator_init = xen_mark_init_mm_pinned, 921 .post_allocator_init = xen_mark_init_mm_pinned,
922};
910 923
924static const struct pv_time_ops xen_time_ops __initdata = {
911 .time_init = xen_time_init, 925 .time_init = xen_time_init,
926
912 .set_wallclock = xen_set_wallclock, 927 .set_wallclock = xen_set_wallclock,
913 .get_wallclock = xen_get_wallclock, 928 .get_wallclock = xen_get_wallclock,
914 .get_cpu_khz = xen_cpu_khz, 929 .get_cpu_khz = xen_cpu_khz,
915 .sched_clock = xen_sched_clock, 930 .sched_clock = xen_sched_clock,
931};
916 932
933static const struct pv_cpu_ops xen_cpu_ops __initdata = {
917 .cpuid = xen_cpuid, 934 .cpuid = xen_cpuid,
918 935
919 .set_debugreg = xen_set_debugreg, 936 .set_debugreg = xen_set_debugreg,
@@ -924,22 +941,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
924 .read_cr0 = native_read_cr0, 941 .read_cr0 = native_read_cr0,
925 .write_cr0 = native_write_cr0, 942 .write_cr0 = native_write_cr0,
926 943
927 .read_cr2 = xen_read_cr2,
928 .write_cr2 = xen_write_cr2,
929
930 .read_cr3 = xen_read_cr3,
931 .write_cr3 = xen_write_cr3,
932
933 .read_cr4 = native_read_cr4, 944 .read_cr4 = native_read_cr4,
934 .read_cr4_safe = native_read_cr4_safe, 945 .read_cr4_safe = native_read_cr4_safe,
935 .write_cr4 = xen_write_cr4, 946 .write_cr4 = xen_write_cr4,
936 947
937 .save_fl = xen_save_fl,
938 .restore_fl = xen_restore_fl,
939 .irq_disable = xen_irq_disable,
940 .irq_enable = xen_irq_enable,
941 .safe_halt = xen_safe_halt,
942 .halt = xen_halt,
943 .wbinvd = native_wbinvd, 948 .wbinvd = native_wbinvd,
944 949
945 .read_msr = native_read_msr_safe, 950 .read_msr = native_read_msr_safe,
@@ -968,6 +973,23 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
968 .set_iopl_mask = xen_set_iopl_mask, 973 .set_iopl_mask = xen_set_iopl_mask,
969 .io_delay = xen_io_delay, 974 .io_delay = xen_io_delay,
970 975
976 .lazy_mode = {
977 .enter = paravirt_enter_lazy_cpu,
978 .leave = xen_leave_lazy,
979 },
980};
981
982static const struct pv_irq_ops xen_irq_ops __initdata = {
983 .init_IRQ = xen_init_IRQ,
984 .save_fl = xen_save_fl,
985 .restore_fl = xen_restore_fl,
986 .irq_disable = xen_irq_disable,
987 .irq_enable = xen_irq_enable,
988 .safe_halt = xen_safe_halt,
989 .halt = xen_halt,
990};
991
992static const struct pv_apic_ops xen_apic_ops __initdata = {
971#ifdef CONFIG_X86_LOCAL_APIC 993#ifdef CONFIG_X86_LOCAL_APIC
972 .apic_write = xen_apic_write, 994 .apic_write = xen_apic_write,
973 .apic_write_atomic = xen_apic_write, 995 .apic_write_atomic = xen_apic_write,
@@ -976,6 +998,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
976 .setup_secondary_clock = paravirt_nop, 998 .setup_secondary_clock = paravirt_nop,
977 .startup_ipi_hook = paravirt_nop, 999 .startup_ipi_hook = paravirt_nop,
978#endif 1000#endif
1001};
1002
1003static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1004 .pagetable_setup_start = xen_pagetable_setup_start,
1005 .pagetable_setup_done = xen_pagetable_setup_done,
1006
1007 .read_cr2 = xen_read_cr2,
1008 .write_cr2 = xen_write_cr2,
1009
1010 .read_cr3 = xen_read_cr3,
1011 .write_cr3 = xen_write_cr3,
979 1012
980 .flush_tlb_user = xen_flush_tlb, 1013 .flush_tlb_user = xen_flush_tlb,
981 .flush_tlb_kernel = xen_flush_tlb, 1014 .flush_tlb_kernel = xen_flush_tlb,
@@ -985,9 +1018,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
985 .pte_update = paravirt_nop, 1018 .pte_update = paravirt_nop,
986 .pte_update_defer = paravirt_nop, 1019 .pte_update_defer = paravirt_nop,
987 1020
988 .pagetable_setup_start = xen_pagetable_setup_start,
989 .pagetable_setup_done = xen_pagetable_setup_done,
990
991 .alloc_pt = xen_alloc_pt_init, 1021 .alloc_pt = xen_alloc_pt_init,
992 .release_pt = xen_release_pt, 1022 .release_pt = xen_release_pt,
993 .alloc_pd = paravirt_nop, 1023 .alloc_pd = paravirt_nop,
@@ -1023,7 +1053,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
1023 .dup_mmap = xen_dup_mmap, 1053 .dup_mmap = xen_dup_mmap,
1024 .exit_mmap = xen_exit_mmap, 1054 .exit_mmap = xen_exit_mmap,
1025 1055
1026 .set_lazy_mode = xen_set_lazy_mode, 1056 .lazy_mode = {
1057 .enter = paravirt_enter_lazy_mmu,
1058 .leave = xen_leave_lazy,
1059 },
1027}; 1060};
1028 1061
1029#ifdef CONFIG_SMP 1062#ifdef CONFIG_SMP
@@ -1079,6 +1112,17 @@ static const struct machine_ops __initdata xen_machine_ops = {
1079}; 1112};
1080 1113
1081 1114
1115static void __init xen_reserve_top(void)
1116{
1117 unsigned long top = HYPERVISOR_VIRT_START;
1118 struct xen_platform_parameters pp;
1119
1120 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
1121 top = pp.virt_start;
1122
1123 reserve_top_address(-top + 2 * PAGE_SIZE);
1124}
1125
1082/* First C function to be called on Xen boot */ 1126/* First C function to be called on Xen boot */
1083asmlinkage void __init xen_start_kernel(void) 1127asmlinkage void __init xen_start_kernel(void)
1084{ 1128{
@@ -1090,7 +1134,14 @@ asmlinkage void __init xen_start_kernel(void)
1090 BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); 1134 BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
1091 1135
1092 /* Install Xen paravirt ops */ 1136 /* Install Xen paravirt ops */
1093 paravirt_ops = xen_paravirt_ops; 1137 pv_info = xen_info;
1138 pv_init_ops = xen_init_ops;
1139 pv_time_ops = xen_time_ops;
1140 pv_cpu_ops = xen_cpu_ops;
1141 pv_irq_ops = xen_irq_ops;
1142 pv_apic_ops = xen_apic_ops;
1143 pv_mmu_ops = xen_mmu_ops;
1144
1094 machine_ops = xen_machine_ops; 1145 machine_ops = xen_machine_ops;
1095 1146
1096#ifdef CONFIG_SMP 1147#ifdef CONFIG_SMP
@@ -1112,6 +1163,7 @@ asmlinkage void __init xen_start_kernel(void)
1112 /* keep using Xen gdt for now; no urgent need to change it */ 1163 /* keep using Xen gdt for now; no urgent need to change it */
1113 1164
1114 x86_write_percpu(xen_cr3, __pa(pgd)); 1165 x86_write_percpu(xen_cr3, __pa(pgd));
1166 x86_write_percpu(xen_current_cr3, __pa(pgd));
1115 1167
1116#ifdef CONFIG_SMP 1168#ifdef CONFIG_SMP
1117 /* Don't do the full vcpu_info placement stuff until we have a 1169 /* Don't do the full vcpu_info placement stuff until we have a
@@ -1123,12 +1175,12 @@ asmlinkage void __init xen_start_kernel(void)
1123 xen_setup_vcpu_info_placement(); 1175 xen_setup_vcpu_info_placement();
1124#endif 1176#endif
1125 1177
1126 paravirt_ops.kernel_rpl = 1; 1178 pv_info.kernel_rpl = 1;
1127 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1179 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1128 paravirt_ops.kernel_rpl = 0; 1180 pv_info.kernel_rpl = 0;
1129 1181
1130 /* set the limit of our address space */ 1182 /* set the limit of our address space */
1131 reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); 1183 xen_reserve_top();
1132 1184
1133 /* set up basic CPUID stuff */ 1185 /* set up basic CPUID stuff */
1134 cpu_detect(&new_cpu_data); 1186 cpu_detect(&new_cpu_data);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0bb7f0019100..b2e32f9d0071 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -154,7 +154,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
154 pte_t *ptep, pte_t pteval) 154 pte_t *ptep, pte_t pteval)
155{ 155{
156 if (mm == current->mm || mm == &init_mm) { 156 if (mm == current->mm || mm == &init_mm) {
157 if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 157 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
158 struct multicall_space mcs; 158 struct multicall_space mcs;
159 mcs = xen_mc_entry(0); 159 mcs = xen_mc_entry(0);
160 160
@@ -303,7 +303,12 @@ pgd_t xen_make_pgd(unsigned long pgd)
303} 303}
304#endif /* CONFIG_X86_PAE */ 304#endif /* CONFIG_X86_PAE */
305 305
306 306enum pt_level {
307 PT_PGD,
308 PT_PUD,
309 PT_PMD,
310 PT_PTE
311};
307 312
308/* 313/*
309 (Yet another) pagetable walker. This one is intended for pinning a 314 (Yet another) pagetable walker. This one is intended for pinning a
@@ -315,7 +320,7 @@ pgd_t xen_make_pgd(unsigned long pgd)
315 FIXADDR_TOP. But the important bit is that we don't pin beyond 320 FIXADDR_TOP. But the important bit is that we don't pin beyond
316 there, because then we start getting into Xen's ptes. 321 there, because then we start getting into Xen's ptes.
317*/ 322*/
318static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), 323static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
319 unsigned long limit) 324 unsigned long limit)
320{ 325{
321 pgd_t *pgd = pgd_base; 326 pgd_t *pgd = pgd_base;
@@ -340,7 +345,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
340 pud = pud_offset(pgd, 0); 345 pud = pud_offset(pgd, 0);
341 346
342 if (PTRS_PER_PUD > 1) /* not folded */ 347 if (PTRS_PER_PUD > 1) /* not folded */
343 flush |= (*func)(virt_to_page(pud), 0); 348 flush |= (*func)(virt_to_page(pud), PT_PUD);
344 349
345 for (; addr != pud_limit; pud++, addr = pud_next) { 350 for (; addr != pud_limit; pud++, addr = pud_next) {
346 pmd_t *pmd; 351 pmd_t *pmd;
@@ -359,7 +364,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
359 pmd = pmd_offset(pud, 0); 364 pmd = pmd_offset(pud, 0);
360 365
361 if (PTRS_PER_PMD > 1) /* not folded */ 366 if (PTRS_PER_PMD > 1) /* not folded */
362 flush |= (*func)(virt_to_page(pmd), 0); 367 flush |= (*func)(virt_to_page(pmd), PT_PMD);
363 368
364 for (; addr != pmd_limit; pmd++) { 369 for (; addr != pmd_limit; pmd++) {
365 addr += (PAGE_SIZE * PTRS_PER_PTE); 370 addr += (PAGE_SIZE * PTRS_PER_PTE);
@@ -371,17 +376,47 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
371 if (pmd_none(*pmd)) 376 if (pmd_none(*pmd))
372 continue; 377 continue;
373 378
374 flush |= (*func)(pmd_page(*pmd), 0); 379 flush |= (*func)(pmd_page(*pmd), PT_PTE);
375 } 380 }
376 } 381 }
377 } 382 }
378 383
379 flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH); 384 flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
380 385
381 return flush; 386 return flush;
382} 387}
383 388
384static int pin_page(struct page *page, unsigned flags) 389static spinlock_t *lock_pte(struct page *page)
390{
391 spinlock_t *ptl = NULL;
392
393#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
394 ptl = __pte_lockptr(page);
395 spin_lock(ptl);
396#endif
397
398 return ptl;
399}
400
401static void do_unlock(void *v)
402{
403 spinlock_t *ptl = v;
404 spin_unlock(ptl);
405}
406
407static void xen_do_pin(unsigned level, unsigned long pfn)
408{
409 struct mmuext_op *op;
410 struct multicall_space mcs;
411
412 mcs = __xen_mc_entry(sizeof(*op));
413 op = mcs.args;
414 op->cmd = level;
415 op->arg1.mfn = pfn_to_mfn(pfn);
416 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
417}
418
419static int pin_page(struct page *page, enum pt_level level)
385{ 420{
386 unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags); 421 unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
387 int flush; 422 int flush;
@@ -396,12 +431,26 @@ static int pin_page(struct page *page, unsigned flags)
396 void *pt = lowmem_page_address(page); 431 void *pt = lowmem_page_address(page);
397 unsigned long pfn = page_to_pfn(page); 432 unsigned long pfn = page_to_pfn(page);
398 struct multicall_space mcs = __xen_mc_entry(0); 433 struct multicall_space mcs = __xen_mc_entry(0);
434 spinlock_t *ptl;
399 435
400 flush = 0; 436 flush = 0;
401 437
438 ptl = NULL;
439 if (level == PT_PTE)
440 ptl = lock_pte(page);
441
402 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, 442 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
403 pfn_pte(pfn, PAGE_KERNEL_RO), 443 pfn_pte(pfn, PAGE_KERNEL_RO),
404 flags); 444 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
445
446 if (level == PT_PTE)
447 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
448
449 if (ptl) {
450 /* Queue a deferred unlock for when this batch
451 is completed. */
452 xen_mc_callback(do_unlock, ptl);
453 }
405 } 454 }
406 455
407 return flush; 456 return flush;
@@ -412,8 +461,7 @@ static int pin_page(struct page *page, unsigned flags)
412 read-only, and can be pinned. */ 461 read-only, and can be pinned. */
413void xen_pgd_pin(pgd_t *pgd) 462void xen_pgd_pin(pgd_t *pgd)
414{ 463{
415 struct multicall_space mcs; 464 unsigned level;
416 struct mmuext_op *op;
417 465
418 xen_mc_batch(); 466 xen_mc_batch();
419 467
@@ -424,16 +472,13 @@ void xen_pgd_pin(pgd_t *pgd)
424 xen_mc_batch(); 472 xen_mc_batch();
425 } 473 }
426 474
427 mcs = __xen_mc_entry(sizeof(*op));
428 op = mcs.args;
429
430#ifdef CONFIG_X86_PAE 475#ifdef CONFIG_X86_PAE
431 op->cmd = MMUEXT_PIN_L3_TABLE; 476 level = MMUEXT_PIN_L3_TABLE;
432#else 477#else
433 op->cmd = MMUEXT_PIN_L2_TABLE; 478 level = MMUEXT_PIN_L2_TABLE;
434#endif 479#endif
435 op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); 480
436 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 481 xen_do_pin(level, PFN_DOWN(__pa(pgd)));
437 482
438 xen_mc_issue(0); 483 xen_mc_issue(0);
439} 484}
@@ -441,7 +486,7 @@ void xen_pgd_pin(pgd_t *pgd)
441/* The init_mm pagetable is really pinned as soon as its created, but 486/* The init_mm pagetable is really pinned as soon as its created, but
442 that's before we have page structures to store the bits. So do all 487 that's before we have page structures to store the bits. So do all
443 the book-keeping now. */ 488 the book-keeping now. */
444static __init int mark_pinned(struct page *page, unsigned flags) 489static __init int mark_pinned(struct page *page, enum pt_level level)
445{ 490{
446 SetPagePinned(page); 491 SetPagePinned(page);
447 return 0; 492 return 0;
@@ -452,18 +497,32 @@ void __init xen_mark_init_mm_pinned(void)
452 pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); 497 pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
453} 498}
454 499
455static int unpin_page(struct page *page, unsigned flags) 500static int unpin_page(struct page *page, enum pt_level level)
456{ 501{
457 unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags); 502 unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
458 503
459 if (pgfl && !PageHighMem(page)) { 504 if (pgfl && !PageHighMem(page)) {
460 void *pt = lowmem_page_address(page); 505 void *pt = lowmem_page_address(page);
461 unsigned long pfn = page_to_pfn(page); 506 unsigned long pfn = page_to_pfn(page);
462 struct multicall_space mcs = __xen_mc_entry(0); 507 spinlock_t *ptl = NULL;
508 struct multicall_space mcs;
509
510 if (level == PT_PTE) {
511 ptl = lock_pte(page);
512
513 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
514 }
515
516 mcs = __xen_mc_entry(0);
463 517
464 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, 518 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
465 pfn_pte(pfn, PAGE_KERNEL), 519 pfn_pte(pfn, PAGE_KERNEL),
466 flags); 520 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
521
522 if (ptl) {
523 /* unlock when batch completed */
524 xen_mc_callback(do_unlock, ptl);
525 }
467 } 526 }
468 527
469 return 0; /* never need to flush on unpin */ 528 return 0; /* never need to flush on unpin */
@@ -472,18 +531,9 @@ static int unpin_page(struct page *page, unsigned flags)
472/* Release a pagetables pages back as normal RW */ 531/* Release a pagetables pages back as normal RW */
473static void xen_pgd_unpin(pgd_t *pgd) 532static void xen_pgd_unpin(pgd_t *pgd)
474{ 533{
475 struct mmuext_op *op;
476 struct multicall_space mcs;
477
478 xen_mc_batch(); 534 xen_mc_batch();
479 535
480 mcs = __xen_mc_entry(sizeof(*op)); 536 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
481
482 op = mcs.args;
483 op->cmd = MMUEXT_UNPIN_TABLE;
484 op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
485
486 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
487 537
488 pgd_walk(pgd, unpin_page, TASK_SIZE); 538 pgd_walk(pgd, unpin_page, TASK_SIZE);
489 539
@@ -514,20 +564,43 @@ static void drop_other_mm_ref(void *info)
514 564
515 if (__get_cpu_var(cpu_tlbstate).active_mm == mm) 565 if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
516 leave_mm(smp_processor_id()); 566 leave_mm(smp_processor_id());
567
568 /* If this cpu still has a stale cr3 reference, then make sure
569 it has been flushed. */
570 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
571 load_cr3(swapper_pg_dir);
572 arch_flush_lazy_cpu_mode();
573 }
517} 574}
518 575
519static void drop_mm_ref(struct mm_struct *mm) 576static void drop_mm_ref(struct mm_struct *mm)
520{ 577{
578 cpumask_t mask;
579 unsigned cpu;
580
521 if (current->active_mm == mm) { 581 if (current->active_mm == mm) {
522 if (current->mm == mm) 582 if (current->mm == mm)
523 load_cr3(swapper_pg_dir); 583 load_cr3(swapper_pg_dir);
524 else 584 else
525 leave_mm(smp_processor_id()); 585 leave_mm(smp_processor_id());
586 arch_flush_lazy_cpu_mode();
526 } 587 }
527 588
528 if (!cpus_empty(mm->cpu_vm_mask)) 589 /* Get the "official" set of cpus referring to our pagetable. */
529 xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, 590 mask = mm->cpu_vm_mask;
530 mm, 1); 591
592 /* It's possible that a vcpu may have a stale reference to our
593 cr3, because its in lazy mode, and it hasn't yet flushed
594 its set of pending hypercalls yet. In this case, we can
595 look at its actual current cr3 value, and force it to flush
596 if needed. */
597 for_each_online_cpu(cpu) {
598 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
599 cpu_set(cpu, mask);
600 }
601
602 if (!cpus_empty(mask))
603 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
531} 604}
532#else 605#else
533static void drop_mm_ref(struct mm_struct *mm) 606static void drop_mm_ref(struct mm_struct *mm)
@@ -562,5 +635,6 @@ void xen_exit_mmap(struct mm_struct *mm)
562 /* pgd may not be pinned in the error exit path of execve */ 635 /* pgd may not be pinned in the error exit path of execve */
563 if (PagePinned(virt_to_page(mm->pgd))) 636 if (PagePinned(virt_to_page(mm->pgd)))
564 xen_pgd_unpin(mm->pgd); 637 xen_pgd_unpin(mm->pgd);
638
565 spin_unlock(&mm->page_table_lock); 639 spin_unlock(&mm->page_table_lock);
566} 640}
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index c837e8e463db..5e6f36f6d876 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -26,13 +26,22 @@
26 26
27#include "multicalls.h" 27#include "multicalls.h"
28 28
29#define MC_DEBUG 1
30
29#define MC_BATCH 32 31#define MC_BATCH 32
30#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) 32#define MC_ARGS (MC_BATCH * 16 / sizeof(u64))
31 33
32struct mc_buffer { 34struct mc_buffer {
33 struct multicall_entry entries[MC_BATCH]; 35 struct multicall_entry entries[MC_BATCH];
36#if MC_DEBUG
37 struct multicall_entry debug[MC_BATCH];
38#endif
34 u64 args[MC_ARGS]; 39 u64 args[MC_ARGS];
35 unsigned mcidx, argidx; 40 struct callback {
41 void (*fn)(void *);
42 void *data;
43 } callbacks[MC_BATCH];
44 unsigned mcidx, argidx, cbidx;
36}; 45};
37 46
38static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); 47static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
@@ -43,6 +52,7 @@ void xen_mc_flush(void)
43 struct mc_buffer *b = &__get_cpu_var(mc_buffer); 52 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
44 int ret = 0; 53 int ret = 0;
45 unsigned long flags; 54 unsigned long flags;
55 int i;
46 56
47 BUG_ON(preemptible()); 57 BUG_ON(preemptible());
48 58
@@ -51,13 +61,31 @@ void xen_mc_flush(void)
51 local_irq_save(flags); 61 local_irq_save(flags);
52 62
53 if (b->mcidx) { 63 if (b->mcidx) {
54 int i; 64#if MC_DEBUG
65 memcpy(b->debug, b->entries,
66 b->mcidx * sizeof(struct multicall_entry));
67#endif
55 68
56 if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) 69 if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
57 BUG(); 70 BUG();
58 for (i = 0; i < b->mcidx; i++) 71 for (i = 0; i < b->mcidx; i++)
59 if (b->entries[i].result < 0) 72 if (b->entries[i].result < 0)
60 ret++; 73 ret++;
74
75#if MC_DEBUG
76 if (ret) {
77 printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
78 ret, smp_processor_id());
79 for(i = 0; i < b->mcidx; i++) {
80 printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
81 i+1, b->mcidx,
82 b->debug[i].op,
83 b->debug[i].args[0],
84 b->entries[i].result);
85 }
86 }
87#endif
88
61 b->mcidx = 0; 89 b->mcidx = 0;
62 b->argidx = 0; 90 b->argidx = 0;
63 } else 91 } else
@@ -65,6 +93,13 @@ void xen_mc_flush(void)
65 93
66 local_irq_restore(flags); 94 local_irq_restore(flags);
67 95
96 for(i = 0; i < b->cbidx; i++) {
97 struct callback *cb = &b->callbacks[i];
98
99 (*cb->fn)(cb->data);
100 }
101 b->cbidx = 0;
102
68 BUG_ON(ret); 103 BUG_ON(ret);
69} 104}
70 105
@@ -88,3 +123,16 @@ struct multicall_space __xen_mc_entry(size_t args)
88 123
89 return ret; 124 return ret;
90} 125}
126
127void xen_mc_callback(void (*fn)(void *), void *data)
128{
129 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
130 struct callback *cb;
131
132 if (b->cbidx == MC_BATCH)
133 xen_mc_flush();
134
135 cb = &b->callbacks[b->cbidx++];
136 cb->fn = fn;
137 cb->data = data;
138}
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index e6f7530b156c..8bae996d99a3 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -35,11 +35,14 @@ void xen_mc_flush(void);
35/* Issue a multicall if we're not in a lazy mode */ 35/* Issue a multicall if we're not in a lazy mode */
36static inline void xen_mc_issue(unsigned mode) 36static inline void xen_mc_issue(unsigned mode)
37{ 37{
38 if ((xen_get_lazy_mode() & mode) == 0) 38 if ((paravirt_get_lazy_mode() & mode) == 0)
39 xen_mc_flush(); 39 xen_mc_flush();
40 40
41 /* restore flags saved in xen_mc_batch */ 41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); 42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
43} 43}
44 44
45/* Set up a callback to be called when the current batch is flushed */
46void xen_mc_callback(void (*fn)(void *), void *data);
47
45#endif /* _XEN_MULTICALLS_H */ 48#endif /* _XEN_MULTICALLS_H */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 6c058585459c..c1b131bcdcbe 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -371,7 +371,8 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
371 void *info, int wait) 371 void *info, int wait)
372{ 372{
373 struct call_data_struct data; 373 struct call_data_struct data;
374 int cpus; 374 int cpus, cpu;
375 bool yield;
375 376
376 /* Holding any lock stops cpus from going down. */ 377 /* Holding any lock stops cpus from going down. */
377 spin_lock(&call_lock); 378 spin_lock(&call_lock);
@@ -400,9 +401,14 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
400 /* Send a message to other CPUs and wait for them to respond */ 401 /* Send a message to other CPUs and wait for them to respond */
401 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); 402 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
402 403
403 /* Make sure other vcpus get a chance to run. 404 /* Make sure other vcpus get a chance to run if they need to. */
404 XXX too severe? Maybe we should check the other CPU's states? */ 405 yield = false;
405 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 406 for_each_cpu_mask(cpu, mask)
407 if (xen_vcpu_stolen(cpu))
408 yield = true;
409
410 if (yield)
411 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
406 412
407 /* Wait for response */ 413 /* Wait for response */
408 while (atomic_read(&data.started) != cpus || 414 while (atomic_read(&data.started) != cpus ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index dfd6db69ead5..d083ff5ef088 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -105,6 +105,12 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
105 } while (get64(&state->state_entry_time) != state_time); 105 } while (get64(&state->state_entry_time) != state_time);
106} 106}
107 107
108/* return true when a vcpu could run but has no real cpu to run on */
109bool xen_vcpu_stolen(int vcpu)
110{
111 return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
112}
113
108static void setup_runstate_info(int cpu) 114static void setup_runstate_info(int cpu)
109{ 115{
110 struct vcpu_register_runstate_memory_area area; 116 struct vcpu_register_runstate_memory_area area;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index b9aaea45f07f..b02a909bfd4c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -11,6 +11,7 @@ void xen_copy_trap_info(struct trap_info *traps);
11 11
12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); 12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
13DECLARE_PER_CPU(unsigned long, xen_cr3); 13DECLARE_PER_CPU(unsigned long, xen_cr3);
14DECLARE_PER_CPU(unsigned long, xen_current_cr3);
14 15
15extern struct start_info *xen_start_info; 16extern struct start_info *xen_start_info;
16extern struct shared_info *HYPERVISOR_shared_info; 17extern struct shared_info *HYPERVISOR_shared_info;
@@ -27,14 +28,9 @@ unsigned long xen_get_wallclock(void);
27int xen_set_wallclock(unsigned long time); 28int xen_set_wallclock(unsigned long time);
28unsigned long long xen_sched_clock(void); 29unsigned long long xen_sched_clock(void);
29 30
30void xen_mark_init_mm_pinned(void); 31bool xen_vcpu_stolen(int vcpu);
31
32DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
33 32
34static inline unsigned xen_get_lazy_mode(void) 33void xen_mark_init_mm_pinned(void);
35{
36 return x86_read_percpu(xen_lazy_mode);
37}
38 34
39void __init xen_fill_possible_map(void); 35void __init xen_fill_possible_map(void);
40 36
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9e3f3cc85d0d..3935469e3662 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1322,8 +1322,8 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
1322 struct scatterlist *sglist) 1322 struct scatterlist *sglist)
1323{ 1323{
1324 struct bio_vec *bvec, *bvprv; 1324 struct bio_vec *bvec, *bvprv;
1325 struct scatterlist *next_sg, *sg;
1326 struct req_iterator iter; 1325 struct req_iterator iter;
1326 struct scatterlist *sg;
1327 int nsegs, cluster; 1327 int nsegs, cluster;
1328 1328
1329 nsegs = 0; 1329 nsegs = 0;
@@ -1333,7 +1333,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
1333 * for each bio in rq 1333 * for each bio in rq
1334 */ 1334 */
1335 bvprv = NULL; 1335 bvprv = NULL;
1336 sg = next_sg = &sglist[0]; 1336 sg = NULL;
1337 rq_for_each_segment(bvec, rq, iter) { 1337 rq_for_each_segment(bvec, rq, iter) {
1338 int nbytes = bvec->bv_len; 1338 int nbytes = bvec->bv_len;
1339 1339
@@ -1349,8 +1349,10 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
1349 sg->length += nbytes; 1349 sg->length += nbytes;
1350 } else { 1350 } else {
1351new_segment: 1351new_segment:
1352 sg = next_sg; 1352 if (!sg)
1353 next_sg = sg_next(sg); 1353 sg = sglist;
1354 else
1355 sg = sg_next(sg);
1354 1356
1355 memset(sg, 0, sizeof(*sg)); 1357 memset(sg, 0, sizeof(*sg));
1356 sg->page = bvec->bv_page; 1358 sg->page = bvec->bv_page;
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c
index 3d6bd0baa56d..efccb2155830 100644
--- a/drivers/char/hvc_lguest.c
+++ b/drivers/char/hvc_lguest.c
@@ -115,7 +115,7 @@ static struct hv_ops lguest_cons = {
115 * (0), and the struct hv_ops containing the put_chars() function. */ 115 * (0), and the struct hv_ops containing the put_chars() function. */
116static int __init cons_init(void) 116static int __init cons_init(void)
117{ 117{
118 if (strcmp(paravirt_ops.name, "lguest") != 0) 118 if (strcmp(pv_info.name, "lguest") != 0)
119 return 0; 119 return 0;
120 120
121 return hvc_instantiate(0, 0, &lguest_cons); 121 return hvc_instantiate(0, 0, &lguest_cons);
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 4a315f08a567..a0788c12b392 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -248,8 +248,8 @@ static void unmap_switcher(void)
248} 248}
249 249
250/*H:130 Our Guest is usually so well behaved; it never tries to do things it 250/*H:130 Our Guest is usually so well behaved; it never tries to do things it
251 * isn't allowed to. Unfortunately, "struct paravirt_ops" isn't quite 251 * isn't allowed to. Unfortunately, Linux's paravirtual infrastructure isn't
252 * complete, because it doesn't contain replacements for the Intel I/O 252 * quite complete, because it doesn't contain replacements for the Intel I/O
253 * instructions. As a result, the Guest sometimes fumbles across one during 253 * instructions. As a result, the Guest sometimes fumbles across one during
254 * the boot process as it probes for various things which are usually attached 254 * the boot process as it probes for various things which are usually attached
255 * to a PC. 255 * to a PC.
@@ -694,7 +694,7 @@ static int __init init(void)
694 694
695 /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ 695 /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */
696 if (paravirt_enabled()) { 696 if (paravirt_enabled()) {
697 printk("lguest is afraid of %s\n", paravirt_ops.name); 697 printk("lguest is afraid of %s\n", pv_info.name);
698 return -EPERM; 698 return -EPERM;
699 } 699 }
700 700
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 4a579c840301..3ba337dde857 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -23,7 +23,7 @@
23 * 23 *
24 * So how does the kernel know it's a Guest? The Guest starts at a special 24 * So how does the kernel know it's a Guest? The Guest starts at a special
25 * entry point marked with a magic string, which sets up a few things then 25 * entry point marked with a magic string, which sets up a few things then
26 * calls here. We replace the native functions in "struct paravirt_ops" 26 * calls here. We replace the native functions various "paravirt" structures
27 * with our Guest versions, then boot like normal. :*/ 27 * with our Guest versions, then boot like normal. :*/
28 28
29/* 29/*
@@ -97,29 +97,17 @@ static cycle_t clock_base;
97 * them as a batch when lazy_mode is eventually turned off. Because hypercalls 97 * them as a batch when lazy_mode is eventually turned off. Because hypercalls
98 * are reasonably expensive, batching them up makes sense. For example, a 98 * are reasonably expensive, batching them up makes sense. For example, a
99 * large mmap might update dozens of page table entries: that code calls 99 * large mmap might update dozens of page table entries: that code calls
100 * lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls 100 * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
101 * lguest_lazy_mode(PARAVIRT_LAZY_NONE). 101 * lguest_leave_lazy_mode().
102 * 102 *
103 * So, when we're in lazy mode, we call async_hypercall() to store the call for 103 * So, when we're in lazy mode, we call async_hypercall() to store the call for
104 * future processing. When lazy mode is turned off we issue a hypercall to 104 * future processing. When lazy mode is turned off we issue a hypercall to
105 * flush the stored calls. 105 * flush the stored calls.
106 * 106 */
107 * There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which 107static void lguest_leave_lazy_mode(void)
108 * indicates we're to flush any outstanding calls immediately. This is used
109 * when an interrupt handler does a kmap_atomic(): the page table changes must
110 * happen immediately even if we're in the middle of a batch. Usually we're
111 * not, though, so there's nothing to do. */
112static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */
113static void lguest_lazy_mode(enum paravirt_lazy_mode mode)
114{ 108{
115 if (mode == PARAVIRT_LAZY_FLUSH) { 109 paravirt_leave_lazy(paravirt_get_lazy_mode());
116 if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE)) 110 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
117 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
118 } else {
119 lazy_mode = mode;
120 if (mode == PARAVIRT_LAZY_NONE)
121 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
122 }
123} 111}
124 112
125static void lazy_hcall(unsigned long call, 113static void lazy_hcall(unsigned long call,
@@ -127,7 +115,7 @@ static void lazy_hcall(unsigned long call,
127 unsigned long arg2, 115 unsigned long arg2,
128 unsigned long arg3) 116 unsigned long arg3)
129{ 117{
130 if (lazy_mode == PARAVIRT_LAZY_NONE) 118 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
131 hcall(call, arg1, arg2, arg3); 119 hcall(call, arg1, arg2, arg3);
132 else 120 else
133 async_hcall(call, arg1, arg2, arg3); 121 async_hcall(call, arg1, arg2, arg3);
@@ -331,7 +319,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
331} 319}
332 320
333/*G:038 That's enough excitement for now, back to ploughing through each of 321/*G:038 That's enough excitement for now, back to ploughing through each of
334 * the paravirt_ops (we're about 1/3 of the way through). 322 * the different pv_ops structures (we're about 1/3 of the way through).
335 * 323 *
336 * This is the Local Descriptor Table, another weird Intel thingy. Linux only 324 * This is the Local Descriptor Table, another weird Intel thingy. Linux only
337 * uses this for some strange applications like Wine. We don't do anything 325 * uses this for some strange applications like Wine. We don't do anything
@@ -558,7 +546,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
558 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 546 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
559} 547}
560 548
561/* Unfortunately for Lguest, the paravirt_ops for page tables were based on 549/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
562 * native page table operations. On native hardware you can set a new page 550 * native page table operations. On native hardware you can set a new page
563 * table entry whenever you want, but if you want to remove one you have to do 551 * table entry whenever you want, but if you want to remove one you have to do
564 * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). 552 * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
@@ -782,7 +770,7 @@ static void lguest_time_init(void)
782 clocksource_register(&lguest_clock); 770 clocksource_register(&lguest_clock);
783 771
784 /* Now we've set up our clock, we can use it as the scheduler clock */ 772 /* Now we've set up our clock, we can use it as the scheduler clock */
785 paravirt_ops.sched_clock = lguest_sched_clock; 773 pv_time_ops.sched_clock = lguest_sched_clock;
786 774
787 /* We can't set cpumask in the initializer: damn C limitations! Set it 775 /* We can't set cpumask in the initializer: damn C limitations! Set it
788 * here and register our timer device. */ 776 * here and register our timer device. */
@@ -904,7 +892,7 @@ static __init char *lguest_memory_setup(void)
904/*G:050 892/*G:050
905 * Patching (Powerfully Placating Performance Pedants) 893 * Patching (Powerfully Placating Performance Pedants)
906 * 894 *
907 * We have already seen that "struct paravirt_ops" lets us replace simple 895 * We have already seen that pv_ops structures let us replace simple
908 * native instructions with calls to the appropriate back end all throughout 896 * native instructions with calls to the appropriate back end all throughout
909 * the kernel. This allows the same kernel to run as a Guest and as a native 897 * the kernel. This allows the same kernel to run as a Guest and as a native
910 * kernel, but it's slow because of all the indirect branches. 898 * kernel, but it's slow because of all the indirect branches.
@@ -929,10 +917,10 @@ static const struct lguest_insns
929{ 917{
930 const char *start, *end; 918 const char *start, *end;
931} lguest_insns[] = { 919} lguest_insns[] = {
932 [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli }, 920 [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
933 [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti }, 921 [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
934 [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf }, 922 [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
935 [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf }, 923 [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
936}; 924};
937 925
938/* Now our patch routine is fairly simple (based on the native one in 926/* Now our patch routine is fairly simple (based on the native one in
@@ -959,9 +947,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
959 return insn_len; 947 return insn_len;
960} 948}
961 949
962/*G:030 Once we get to lguest_init(), we know we're a Guest. The paravirt_ops 950/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
963 * structure in the kernel provides a single point for (almost) every routine 951 * structures in the kernel provide points for (almost) every routine we have
964 * we have to override to avoid privileged instructions. */ 952 * to override to avoid privileged instructions. */
965__init void lguest_init(void *boot) 953__init void lguest_init(void *boot)
966{ 954{
967 /* Copy boot parameters first: the Launcher put the physical location 955 /* Copy boot parameters first: the Launcher put the physical location
@@ -976,54 +964,70 @@ __init void lguest_init(void *boot)
976 964
977 /* We're under lguest, paravirt is enabled, and we're running at 965 /* We're under lguest, paravirt is enabled, and we're running at
978 * privilege level 1, not 0 as normal. */ 966 * privilege level 1, not 0 as normal. */
979 paravirt_ops.name = "lguest"; 967 pv_info.name = "lguest";
980 paravirt_ops.paravirt_enabled = 1; 968 pv_info.paravirt_enabled = 1;
981 paravirt_ops.kernel_rpl = 1; 969 pv_info.kernel_rpl = 1;
982 970
983 /* We set up all the lguest overrides for sensitive operations. These 971 /* We set up all the lguest overrides for sensitive operations. These
984 * are detailed with the operations themselves. */ 972 * are detailed with the operations themselves. */
985 paravirt_ops.save_fl = save_fl; 973
986 paravirt_ops.restore_fl = restore_fl; 974 /* interrupt-related operations */
987 paravirt_ops.irq_disable = irq_disable; 975 pv_irq_ops.init_IRQ = lguest_init_IRQ;
988 paravirt_ops.irq_enable = irq_enable; 976 pv_irq_ops.save_fl = save_fl;
989 paravirt_ops.load_gdt = lguest_load_gdt; 977 pv_irq_ops.restore_fl = restore_fl;
990 paravirt_ops.memory_setup = lguest_memory_setup; 978 pv_irq_ops.irq_disable = irq_disable;
991 paravirt_ops.cpuid = lguest_cpuid; 979 pv_irq_ops.irq_enable = irq_enable;
992 paravirt_ops.write_cr3 = lguest_write_cr3; 980 pv_irq_ops.safe_halt = lguest_safe_halt;
993 paravirt_ops.flush_tlb_user = lguest_flush_tlb_user; 981
994 paravirt_ops.flush_tlb_single = lguest_flush_tlb_single; 982 /* init-time operations */
995 paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; 983 pv_init_ops.memory_setup = lguest_memory_setup;
996 paravirt_ops.set_pte = lguest_set_pte; 984 pv_init_ops.patch = lguest_patch;
997 paravirt_ops.set_pte_at = lguest_set_pte_at; 985
998 paravirt_ops.set_pmd = lguest_set_pmd; 986 /* Intercepts of various cpu instructions */
987 pv_cpu_ops.load_gdt = lguest_load_gdt;
988 pv_cpu_ops.cpuid = lguest_cpuid;
989 pv_cpu_ops.load_idt = lguest_load_idt;
990 pv_cpu_ops.iret = lguest_iret;
991 pv_cpu_ops.load_esp0 = lguest_load_esp0;
992 pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
993 pv_cpu_ops.set_ldt = lguest_set_ldt;
994 pv_cpu_ops.load_tls = lguest_load_tls;
995 pv_cpu_ops.set_debugreg = lguest_set_debugreg;
996 pv_cpu_ops.clts = lguest_clts;
997 pv_cpu_ops.read_cr0 = lguest_read_cr0;
998 pv_cpu_ops.write_cr0 = lguest_write_cr0;
999 pv_cpu_ops.read_cr4 = lguest_read_cr4;
1000 pv_cpu_ops.write_cr4 = lguest_write_cr4;
1001 pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
1002 pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
1003 pv_cpu_ops.wbinvd = lguest_wbinvd;
1004 pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
1005 pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
1006
1007 /* pagetable management */
1008 pv_mmu_ops.write_cr3 = lguest_write_cr3;
1009 pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
1010 pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
1011 pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
1012 pv_mmu_ops.set_pte = lguest_set_pte;
1013 pv_mmu_ops.set_pte_at = lguest_set_pte_at;
1014 pv_mmu_ops.set_pmd = lguest_set_pmd;
1015 pv_mmu_ops.read_cr2 = lguest_read_cr2;
1016 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1017 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1018 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
1019
999#ifdef CONFIG_X86_LOCAL_APIC 1020#ifdef CONFIG_X86_LOCAL_APIC
1000 paravirt_ops.apic_write = lguest_apic_write; 1021 /* apic read/write intercepts */
1001 paravirt_ops.apic_write_atomic = lguest_apic_write; 1022 pv_apic_ops.apic_write = lguest_apic_write;
1002 paravirt_ops.apic_read = lguest_apic_read; 1023 pv_apic_ops.apic_write_atomic = lguest_apic_write;
1024 pv_apic_ops.apic_read = lguest_apic_read;
1003#endif 1025#endif
1004 paravirt_ops.load_idt = lguest_load_idt; 1026
1005 paravirt_ops.iret = lguest_iret; 1027 /* time operations */
1006 paravirt_ops.load_esp0 = lguest_load_esp0; 1028 pv_time_ops.get_wallclock = lguest_get_wallclock;
1007 paravirt_ops.load_tr_desc = lguest_load_tr_desc; 1029 pv_time_ops.time_init = lguest_time_init;
1008 paravirt_ops.set_ldt = lguest_set_ldt; 1030
1009 paravirt_ops.load_tls = lguest_load_tls;
1010 paravirt_ops.set_debugreg = lguest_set_debugreg;
1011 paravirt_ops.clts = lguest_clts;
1012 paravirt_ops.read_cr0 = lguest_read_cr0;
1013 paravirt_ops.write_cr0 = lguest_write_cr0;
1014 paravirt_ops.init_IRQ = lguest_init_IRQ;
1015 paravirt_ops.read_cr2 = lguest_read_cr2;
1016 paravirt_ops.read_cr3 = lguest_read_cr3;
1017 paravirt_ops.read_cr4 = lguest_read_cr4;
1018 paravirt_ops.write_cr4 = lguest_write_cr4;
1019 paravirt_ops.write_gdt_entry = lguest_write_gdt_entry;
1020 paravirt_ops.write_idt_entry = lguest_write_idt_entry;
1021 paravirt_ops.patch = lguest_patch;
1022 paravirt_ops.safe_halt = lguest_safe_halt;
1023 paravirt_ops.get_wallclock = lguest_get_wallclock;
1024 paravirt_ops.time_init = lguest_time_init;
1025 paravirt_ops.set_lazy_mode = lguest_lazy_mode;
1026 paravirt_ops.wbinvd = lguest_wbinvd;
1027 /* Now is a good time to look at the implementations of these functions 1031 /* Now is a good time to look at the implementations of these functions
1028 * before returning to the rest of lguest_init(). */ 1032 * before returning to the rest of lguest_init(). */
1029 1033
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c
index 9e7752cc8002..57329788f8a7 100644
--- a/drivers/lguest/lguest_bus.c
+++ b/drivers/lguest/lguest_bus.c
@@ -201,7 +201,7 @@ static void scan_devices(void)
201 * "struct lguest_device_desc" array. */ 201 * "struct lguest_device_desc" array. */
202static int __init lguest_bus_init(void) 202static int __init lguest_bus_init(void)
203{ 203{
204 if (strcmp(paravirt_ops.name, "lguest") != 0) 204 if (strcmp(pv_info.name, "lguest") != 0)
205 return 0; 205 return 0;
206 206
207 /* Devices are in a single page above top of "normal" mem */ 207 /* Devices are in a single page above top of "normal" mem */
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0c86be71bb33..aac8a02cbe80 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -764,6 +764,8 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
764 if (unlikely(!sgl)) 764 if (unlikely(!sgl))
765 goto enomem; 765 goto enomem;
766 766
767 memset(sgl, 0, sizeof(*sgl) * sgp->size);
768
767 /* 769 /*
768 * first loop through, set initial index and return value 770 * first loop through, set initial index and return value
769 */ 771 */
diff --git a/include/asm-mips/mach-au1x00/au1000.h b/include/asm-mips/mach-au1x00/au1000.h
index b37baf8cf624..3bdce9126f16 100644
--- a/include/asm-mips/mach-au1x00/au1000.h
+++ b/include/asm-mips/mach-au1x00/au1000.h
@@ -40,7 +40,9 @@
40 40
41#include <linux/delay.h> 41#include <linux/delay.h>
42#include <linux/types.h> 42#include <linux/types.h>
43
43#include <asm/io.h> 44#include <asm/io.h>
45#include <asm/irq.h>
44 46
45/* cpu pipeline flush */ 47/* cpu pipeline flush */
46void static inline au_sync(void) 48void static inline au_sync(void)
@@ -523,63 +525,67 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
523/* Interrupt Numbers */ 525/* Interrupt Numbers */
524/* Au1000 */ 526/* Au1000 */
525#ifdef CONFIG_SOC_AU1000 527#ifdef CONFIG_SOC_AU1000
526#define AU1000_UART0_INT 0 528enum soc_au1000_ints {
527#define AU1000_UART1_INT 1 /* au1000 */ 529 AU1000_FIRST_INT = MIPS_CPU_IRQ_BASE,
528#define AU1000_UART2_INT 2 /* au1000 */ 530 AU1000_UART0_INT = AU1000_FIRST_INT,
529#define AU1000_UART3_INT 3 531 AU1000_UART1_INT, /* au1000 */
530#define AU1000_SSI0_INT 4 /* au1000 */ 532 AU1000_UART2_INT, /* au1000 */
531#define AU1000_SSI1_INT 5 /* au1000 */ 533 AU1000_UART3_INT,
532#define AU1000_DMA_INT_BASE 6 534 AU1000_SSI0_INT, /* au1000 */
533#define AU1000_TOY_INT 14 535 AU1000_SSI1_INT, /* au1000 */
534#define AU1000_TOY_MATCH0_INT 15 536 AU1000_DMA_INT_BASE,
535#define AU1000_TOY_MATCH1_INT 16 537
536#define AU1000_TOY_MATCH2_INT 17 538 AU1000_TOY_INT = AU1000_FIRST_INT + 14,
537#define AU1000_RTC_INT 18 539 AU1000_TOY_MATCH0_INT,
538#define AU1000_RTC_MATCH0_INT 19 540 AU1000_TOY_MATCH1_INT,
539#define AU1000_RTC_MATCH1_INT 20 541 AU1000_TOY_MATCH2_INT,
540#define AU1000_RTC_MATCH2_INT 21 542 AU1000_RTC_INT,
541#define AU1000_IRDA_TX_INT 22 /* au1000 */ 543 AU1000_RTC_MATCH0_INT,
542#define AU1000_IRDA_RX_INT 23 /* au1000 */ 544 AU1000_RTC_MATCH1_INT,
543#define AU1000_USB_DEV_REQ_INT 24 545 AU1000_RTC_MATCH2_INT,
544#define AU1000_USB_DEV_SUS_INT 25 546 AU1000_IRDA_TX_INT, /* au1000 */
545#define AU1000_USB_HOST_INT 26 547 AU1000_IRDA_RX_INT, /* au1000 */
546#define AU1000_ACSYNC_INT 27 548 AU1000_USB_DEV_REQ_INT,
547#define AU1000_MAC0_DMA_INT 28 549 AU1000_USB_DEV_SUS_INT,
548#define AU1000_MAC1_DMA_INT 29 550 AU1000_USB_HOST_INT,
549#define AU1000_I2S_UO_INT 30 /* au1000 */ 551 AU1000_ACSYNC_INT,
550#define AU1000_AC97C_INT 31 552 AU1000_MAC0_DMA_INT,
551#define AU1000_GPIO_0 32 553 AU1000_MAC1_DMA_INT,
552#define AU1000_GPIO_1 33 554 AU1000_I2S_UO_INT, /* au1000 */
553#define AU1000_GPIO_2 34 555 AU1000_AC97C_INT,
554#define AU1000_GPIO_3 35 556 AU1000_GPIO_0,
555#define AU1000_GPIO_4 36 557 AU1000_GPIO_1,
556#define AU1000_GPIO_5 37 558 AU1000_GPIO_2,
557#define AU1000_GPIO_6 38 559 AU1000_GPIO_3,
558#define AU1000_GPIO_7 39 560 AU1000_GPIO_4,
559#define AU1000_GPIO_8 40 561 AU1000_GPIO_5,
560#define AU1000_GPIO_9 41 562 AU1000_GPIO_6,
561#define AU1000_GPIO_10 42 563 AU1000_GPIO_7,
562#define AU1000_GPIO_11 43 564 AU1000_GPIO_8,
563#define AU1000_GPIO_12 44 565 AU1000_GPIO_9,
564#define AU1000_GPIO_13 45 566 AU1000_GPIO_10,
565#define AU1000_GPIO_14 46 567 AU1000_GPIO_11,
566#define AU1000_GPIO_15 47 568 AU1000_GPIO_12,
567#define AU1000_GPIO_16 48 569 AU1000_GPIO_13,
568#define AU1000_GPIO_17 49 570 AU1000_GPIO_14,
569#define AU1000_GPIO_18 50 571 AU1000_GPIO_15,
570#define AU1000_GPIO_19 51 572 AU1000_GPIO_16,
571#define AU1000_GPIO_20 52 573 AU1000_GPIO_17,
572#define AU1000_GPIO_21 53 574 AU1000_GPIO_18,
573#define AU1000_GPIO_22 54 575 AU1000_GPIO_19,
574#define AU1000_GPIO_23 55 576 AU1000_GPIO_20,
575#define AU1000_GPIO_24 56 577 AU1000_GPIO_21,
576#define AU1000_GPIO_25 57 578 AU1000_GPIO_22,
577#define AU1000_GPIO_26 58 579 AU1000_GPIO_23,
578#define AU1000_GPIO_27 59 580 AU1000_GPIO_24,
579#define AU1000_GPIO_28 60 581 AU1000_GPIO_25,
580#define AU1000_GPIO_29 61 582 AU1000_GPIO_26,
581#define AU1000_GPIO_30 62 583 AU1000_GPIO_27,
582#define AU1000_GPIO_31 63 584 AU1000_GPIO_28,
585 AU1000_GPIO_29,
586 AU1000_GPIO_30,
587 AU1000_GPIO_31,
588};
583 589
584#define UART0_ADDR 0xB1100000 590#define UART0_ADDR 0xB1100000
585#define UART1_ADDR 0xB1200000 591#define UART1_ADDR 0xB1200000
@@ -598,61 +604,65 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
598 604
599/* Au1500 */ 605/* Au1500 */
600#ifdef CONFIG_SOC_AU1500 606#ifdef CONFIG_SOC_AU1500
601#define AU1500_UART0_INT 0 607enum soc_au1500_ints {
602#define AU1000_PCI_INTA 1 /* au1500 */ 608 AU1500_FIRST_INT = MIPS_CPU_IRQ_BASE,
603#define AU1000_PCI_INTB 2 /* au1500 */ 609 AU1500_UART0_INT = AU1500_FIRST_INT,
604#define AU1500_UART3_INT 3 610 AU1000_PCI_INTA, /* au1500 */
605#define AU1000_PCI_INTC 4 /* au1500 */ 611 AU1000_PCI_INTB, /* au1500 */
606#define AU1000_PCI_INTD 5 /* au1500 */ 612 AU1500_UART3_INT,
607#define AU1000_DMA_INT_BASE 6 613 AU1000_PCI_INTC, /* au1500 */
608#define AU1000_TOY_INT 14 614 AU1000_PCI_INTD, /* au1500 */
609#define AU1000_TOY_MATCH0_INT 15 615 AU1000_DMA_INT_BASE,
610#define AU1000_TOY_MATCH1_INT 16 616
611#define AU1000_TOY_MATCH2_INT 17 617 AU1000_TOY_INT = AU1500_FIRST_INT + 14,
612#define AU1000_RTC_INT 18 618 AU1000_TOY_MATCH0_INT,
613#define AU1000_RTC_MATCH0_INT 19 619 AU1000_TOY_MATCH1_INT,
614#define AU1000_RTC_MATCH1_INT 20 620 AU1000_TOY_MATCH2_INT,
615#define AU1000_RTC_MATCH2_INT 21 621 AU1000_RTC_INT,
616#define AU1500_PCI_ERR_INT 22 622 AU1000_RTC_MATCH0_INT,
617#define AU1000_USB_DEV_REQ_INT 24 623 AU1000_RTC_MATCH1_INT,
618#define AU1000_USB_DEV_SUS_INT 25 624 AU1000_RTC_MATCH2_INT,
619#define AU1000_USB_HOST_INT 26 625 AU1500_PCI_ERR_INT,
620#define AU1000_ACSYNC_INT 27 626 AU1000_USB_DEV_REQ_INT,
621#define AU1500_MAC0_DMA_INT 28 627 AU1000_USB_DEV_SUS_INT,
622#define AU1500_MAC1_DMA_INT 29 628 AU1000_USB_HOST_INT,
623#define AU1000_AC97C_INT 31 629 AU1000_ACSYNC_INT,
624#define AU1000_GPIO_0 32 630 AU1500_MAC0_DMA_INT,
625#define AU1000_GPIO_1 33 631 AU1500_MAC1_DMA_INT,
626#define AU1000_GPIO_2 34 632 AU1000_AC97C_INT = AU1500_FIRST_INT + 31,
627#define AU1000_GPIO_3 35 633 AU1000_GPIO_0,
628#define AU1000_GPIO_4 36 634 AU1000_GPIO_1,
629#define AU1000_GPIO_5 37 635 AU1000_GPIO_2,
630#define AU1000_GPIO_6 38 636 AU1000_GPIO_3,
631#define AU1000_GPIO_7 39 637 AU1000_GPIO_4,
632#define AU1000_GPIO_8 40 638 AU1000_GPIO_5,
633#define AU1000_GPIO_9 41 639 AU1000_GPIO_6,
634#define AU1000_GPIO_10 42 640 AU1000_GPIO_7,
635#define AU1000_GPIO_11 43 641 AU1000_GPIO_8,
636#define AU1000_GPIO_12 44 642 AU1000_GPIO_9,
637#define AU1000_GPIO_13 45 643 AU1000_GPIO_10,
638#define AU1000_GPIO_14 46 644 AU1000_GPIO_11,
639#define AU1000_GPIO_15 47 645 AU1000_GPIO_12,
640#define AU1500_GPIO_200 48 646 AU1000_GPIO_13,
641#define AU1500_GPIO_201 49 647 AU1000_GPIO_14,
642#define AU1500_GPIO_202 50 648 AU1000_GPIO_15,
643#define AU1500_GPIO_203 51 649 AU1500_GPIO_200,
644#define AU1500_GPIO_20 52 650 AU1500_GPIO_201,
645#define AU1500_GPIO_204 53 651 AU1500_GPIO_202,
646#define AU1500_GPIO_205 54 652 AU1500_GPIO_203,
647#define AU1500_GPIO_23 55 653 AU1500_GPIO_20,
648#define AU1500_GPIO_24 56 654 AU1500_GPIO_204,
649#define AU1500_GPIO_25 57 655 AU1500_GPIO_205,
650#define AU1500_GPIO_26 58 656 AU1500_GPIO_23,
651#define AU1500_GPIO_27 59 657 AU1500_GPIO_24,
652#define AU1500_GPIO_28 60 658 AU1500_GPIO_25,
653#define AU1500_GPIO_206 61 659 AU1500_GPIO_26,
654#define AU1500_GPIO_207 62 660 AU1500_GPIO_27,
655#define AU1500_GPIO_208_215 63 661 AU1500_GPIO_28,
662 AU1500_GPIO_206,
663 AU1500_GPIO_207,
664 AU1500_GPIO_208_215,
665};
656 666
657/* shortcuts */ 667/* shortcuts */
658#define INTA AU1000_PCI_INTA 668#define INTA AU1000_PCI_INTA
@@ -675,63 +685,67 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
675 685
676/* Au1100 */ 686/* Au1100 */
677#ifdef CONFIG_SOC_AU1100 687#ifdef CONFIG_SOC_AU1100
678#define AU1100_UART0_INT 0 688enum soc_au1100_ints {
679#define AU1100_UART1_INT 1 689 AU1100_FIRST_INT = MIPS_CPU_IRQ_BASE,
680#define AU1100_SD_INT 2 690 AU1100_UART0_INT,
681#define AU1100_UART3_INT 3 691 AU1100_UART1_INT,
682#define AU1000_SSI0_INT 4 692 AU1100_SD_INT,
683#define AU1000_SSI1_INT 5 693 AU1100_UART3_INT,
684#define AU1000_DMA_INT_BASE 6 694 AU1000_SSI0_INT,
685#define AU1000_TOY_INT 14 695 AU1000_SSI1_INT,
686#define AU1000_TOY_MATCH0_INT 15 696 AU1000_DMA_INT_BASE,
687#define AU1000_TOY_MATCH1_INT 16 697
688#define AU1000_TOY_MATCH2_INT 17 698 AU1000_TOY_INT = AU1100_FIRST_INT + 14,
689#define AU1000_RTC_INT 18 699 AU1000_TOY_MATCH0_INT,
690#define AU1000_RTC_MATCH0_INT 19 700 AU1000_TOY_MATCH1_INT,
691#define AU1000_RTC_MATCH1_INT 20 701 AU1000_TOY_MATCH2_INT,
692#define AU1000_RTC_MATCH2_INT 21 702 AU1000_RTC_INT,
693#define AU1000_IRDA_TX_INT 22 703 AU1000_RTC_MATCH0_INT,
694#define AU1000_IRDA_RX_INT 23 704 AU1000_RTC_MATCH1_INT,
695#define AU1000_USB_DEV_REQ_INT 24 705 AU1000_RTC_MATCH2_INT,
696#define AU1000_USB_DEV_SUS_INT 25 706 AU1000_IRDA_TX_INT,
697#define AU1000_USB_HOST_INT 26 707 AU1000_IRDA_RX_INT,
698#define AU1000_ACSYNC_INT 27 708 AU1000_USB_DEV_REQ_INT,
699#define AU1100_MAC0_DMA_INT 28 709 AU1000_USB_DEV_SUS_INT,
700#define AU1100_GPIO_208_215 29 710 AU1000_USB_HOST_INT,
701#define AU1100_LCD_INT 30 711 AU1000_ACSYNC_INT,
702#define AU1000_AC97C_INT 31 712 AU1100_MAC0_DMA_INT,
703#define AU1000_GPIO_0 32 713 AU1100_GPIO_208_215,
704#define AU1000_GPIO_1 33 714 AU1100_LCD_INT,
705#define AU1000_GPIO_2 34 715 AU1000_AC97C_INT,
706#define AU1000_GPIO_3 35 716 AU1000_GPIO_0,
707#define AU1000_GPIO_4 36 717 AU1000_GPIO_1,
708#define AU1000_GPIO_5 37 718 AU1000_GPIO_2,
709#define AU1000_GPIO_6 38 719 AU1000_GPIO_3,
710#define AU1000_GPIO_7 39 720 AU1000_GPIO_4,
711#define AU1000_GPIO_8 40 721 AU1000_GPIO_5,
712#define AU1000_GPIO_9 41 722 AU1000_GPIO_6,
713#define AU1000_GPIO_10 42 723 AU1000_GPIO_7,
714#define AU1000_GPIO_11 43 724 AU1000_GPIO_8,
715#define AU1000_GPIO_12 44 725 AU1000_GPIO_9,
716#define AU1000_GPIO_13 45 726 AU1000_GPIO_10,
717#define AU1000_GPIO_14 46 727 AU1000_GPIO_11,
718#define AU1000_GPIO_15 47 728 AU1000_GPIO_12,
719#define AU1000_GPIO_16 48 729 AU1000_GPIO_13,
720#define AU1000_GPIO_17 49 730 AU1000_GPIO_14,
721#define AU1000_GPIO_18 50 731 AU1000_GPIO_15,
722#define AU1000_GPIO_19 51 732 AU1000_GPIO_16,
723#define AU1000_GPIO_20 52 733 AU1000_GPIO_17,
724#define AU1000_GPIO_21 53 734 AU1000_GPIO_18,
725#define AU1000_GPIO_22 54 735 AU1000_GPIO_19,
726#define AU1000_GPIO_23 55 736 AU1000_GPIO_20,
727#define AU1000_GPIO_24 56 737 AU1000_GPIO_21,
728#define AU1000_GPIO_25 57 738 AU1000_GPIO_22,
729#define AU1000_GPIO_26 58 739 AU1000_GPIO_23,
730#define AU1000_GPIO_27 59 740 AU1000_GPIO_24,
731#define AU1000_GPIO_28 60 741 AU1000_GPIO_25,
732#define AU1000_GPIO_29 61 742 AU1000_GPIO_26,
733#define AU1000_GPIO_30 62 743 AU1000_GPIO_27,
734#define AU1000_GPIO_31 63 744 AU1000_GPIO_28,
745 AU1000_GPIO_29,
746 AU1000_GPIO_30,
747 AU1000_GPIO_31,
748};
735 749
736#define UART0_ADDR 0xB1100000 750#define UART0_ADDR 0xB1100000
737#define UART1_ADDR 0xB1200000 751#define UART1_ADDR 0xB1200000
@@ -746,69 +760,73 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
746#endif /* CONFIG_SOC_AU1100 */ 760#endif /* CONFIG_SOC_AU1100 */
747 761
748#ifdef CONFIG_SOC_AU1550 762#ifdef CONFIG_SOC_AU1550
749#define AU1550_UART0_INT 0 763enum soc_au1550_ints {
750#define AU1550_PCI_INTA 1 764 AU1550_FIRST_INT = MIPS_CPU_IRQ_BASE,
751#define AU1550_PCI_INTB 2 765 AU1550_UART0_INT = AU1550_FIRST_INT,
752#define AU1550_DDMA_INT 3 766 AU1550_PCI_INTA,
753#define AU1550_CRYPTO_INT 4 767 AU1550_PCI_INTB,
754#define AU1550_PCI_INTC 5 768 AU1550_DDMA_INT,
755#define AU1550_PCI_INTD 6 769 AU1550_CRYPTO_INT,
756#define AU1550_PCI_RST_INT 7 770 AU1550_PCI_INTC,
757#define AU1550_UART1_INT 8 771 AU1550_PCI_INTD,
758#define AU1550_UART3_INT 9 772 AU1550_PCI_RST_INT,
759#define AU1550_PSC0_INT 10 773 AU1550_UART1_INT,
760#define AU1550_PSC1_INT 11 774 AU1550_UART3_INT,
761#define AU1550_PSC2_INT 12 775 AU1550_PSC0_INT,
762#define AU1550_PSC3_INT 13 776 AU1550_PSC1_INT,
763#define AU1000_TOY_INT 14 777 AU1550_PSC2_INT,
764#define AU1000_TOY_MATCH0_INT 15 778 AU1550_PSC3_INT,
765#define AU1000_TOY_MATCH1_INT 16 779 AU1000_TOY_INT,
766#define AU1000_TOY_MATCH2_INT 17 780 AU1000_TOY_MATCH0_INT,
767#define AU1000_RTC_INT 18 781 AU1000_TOY_MATCH1_INT,
768#define AU1000_RTC_MATCH0_INT 19 782 AU1000_TOY_MATCH2_INT,
769#define AU1000_RTC_MATCH1_INT 20 783 AU1000_RTC_INT,
770#define AU1000_RTC_MATCH2_INT 21 784 AU1000_RTC_MATCH0_INT,
771#define AU1550_NAND_INT 23 785 AU1000_RTC_MATCH1_INT,
772#define AU1550_USB_DEV_REQ_INT 24 786 AU1000_RTC_MATCH2_INT,
773#define AU1550_USB_DEV_SUS_INT 25 787
774#define AU1550_USB_HOST_INT 26 788 AU1550_NAND_INT = AU1550_FIRST_INT + 23,
775#define AU1000_USB_DEV_REQ_INT AU1550_USB_DEV_REQ_INT 789 AU1550_USB_DEV_REQ_INT,
776#define AU1000_USB_DEV_SUS_INT AU1550_USB_DEV_SUS_INT 790 AU1000_USB_DEV_REQ_INT = AU1550_USB_DEV_REQ_INT,
777#define AU1000_USB_HOST_INT AU1550_USB_HOST_INT 791 AU1550_USB_DEV_SUS_INT,
778#define AU1550_MAC0_DMA_INT 27 792 AU1000_USB_DEV_SUS_INT = AU1550_USB_DEV_SUS_INT,
779#define AU1550_MAC1_DMA_INT 28 793 AU1550_USB_HOST_INT,
780#define AU1000_GPIO_0 32 794 AU1000_USB_HOST_INT = AU1550_USB_HOST_INT,
781#define AU1000_GPIO_1 33 795 AU1550_MAC0_DMA_INT,
782#define AU1000_GPIO_2 34 796 AU1550_MAC1_DMA_INT,
783#define AU1000_GPIO_3 35 797 AU1000_GPIO_0 = AU1550_FIRST_INT + 32,
784#define AU1000_GPIO_4 36 798 AU1000_GPIO_1,
785#define AU1000_GPIO_5 37 799 AU1000_GPIO_2,
786#define AU1000_GPIO_6 38 800 AU1000_GPIO_3,
787#define AU1000_GPIO_7 39 801 AU1000_GPIO_4,
788#define AU1000_GPIO_8 40 802 AU1000_GPIO_5,
789#define AU1000_GPIO_9 41 803 AU1000_GPIO_6,
790#define AU1000_GPIO_10 42 804 AU1000_GPIO_7,
791#define AU1000_GPIO_11 43 805 AU1000_GPIO_8,
792#define AU1000_GPIO_12 44 806 AU1000_GPIO_9,
793#define AU1000_GPIO_13 45 807 AU1000_GPIO_10,
794#define AU1000_GPIO_14 46 808 AU1000_GPIO_11,
795#define AU1000_GPIO_15 47 809 AU1000_GPIO_12,
796#define AU1550_GPIO_200 48 810 AU1000_GPIO_13,
797#define AU1500_GPIO_201_205 49 // Logical or of GPIO201:205 811 AU1000_GPIO_14,
798#define AU1500_GPIO_16 50 812 AU1000_GPIO_15,
799#define AU1500_GPIO_17 51 813 AU1550_GPIO_200,
800#define AU1500_GPIO_20 52 814 AU1500_GPIO_201_205, /* Logical or of GPIO201:205 */
801#define AU1500_GPIO_21 53 815 AU1500_GPIO_16,
802#define AU1500_GPIO_22 54 816 AU1500_GPIO_17,
803#define AU1500_GPIO_23 55 817 AU1500_GPIO_20,
804#define AU1500_GPIO_24 56 818 AU1500_GPIO_21,
805#define AU1500_GPIO_25 57 819 AU1500_GPIO_22,
806#define AU1500_GPIO_26 58 820 AU1500_GPIO_23,
807#define AU1500_GPIO_27 59 821 AU1500_GPIO_24,
808#define AU1500_GPIO_28 60 822 AU1500_GPIO_25,
809#define AU1500_GPIO_206 61 823 AU1500_GPIO_26,
810#define AU1500_GPIO_207 62 824 AU1500_GPIO_27,
811#define AU1500_GPIO_208_218 63 // Logical or of GPIO208:218 825 AU1500_GPIO_28,
826 AU1500_GPIO_206,
827 AU1500_GPIO_207,
828 AU1500_GPIO_208_218, /* Logical or of GPIO208:218 */
829};
812 830
813/* shortcuts */ 831/* shortcuts */
814#define INTA AU1550_PCI_INTA 832#define INTA AU1550_PCI_INTA
@@ -832,70 +850,74 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
832#endif /* CONFIG_SOC_AU1550 */ 850#endif /* CONFIG_SOC_AU1550 */
833 851
834#ifdef CONFIG_SOC_AU1200 852#ifdef CONFIG_SOC_AU1200
835#define AU1200_UART0_INT 0 853enum soc_au1200_ints {
836#define AU1200_SWT_INT 1 854 AU1200_FIRST_INT = MIPS_CPU_IRQ_BASE,
837#define AU1200_SD_INT 2 855 AU1200_UART0_INT = AU1200_FIRST_INT,
838#define AU1200_DDMA_INT 3 856 AU1200_SWT_INT,
839#define AU1200_MAE_BE_INT 4 857 AU1200_SD_INT,
840#define AU1200_GPIO_200 5 858 AU1200_DDMA_INT,
841#define AU1200_GPIO_201 6 859 AU1200_MAE_BE_INT,
842#define AU1200_GPIO_202 7 860 AU1200_GPIO_200,
843#define AU1200_UART1_INT 8 861 AU1200_GPIO_201,
844#define AU1200_MAE_FE_INT 9 862 AU1200_GPIO_202,
845#define AU1200_PSC0_INT 10 863 AU1200_UART1_INT,
846#define AU1200_PSC1_INT 11 864 AU1200_MAE_FE_INT,
847#define AU1200_AES_INT 12 865 AU1200_PSC0_INT,
848#define AU1200_CAMERA_INT 13 866 AU1200_PSC1_INT,
849#define AU1000_TOY_INT 14 867 AU1200_AES_INT,
850#define AU1000_TOY_MATCH0_INT 15 868 AU1200_CAMERA_INT,
851#define AU1000_TOY_MATCH1_INT 16 869 AU1000_TOY_INT,
852#define AU1000_TOY_MATCH2_INT 17 870 AU1000_TOY_MATCH0_INT,
853#define AU1000_RTC_INT 18 871 AU1000_TOY_MATCH1_INT,
854#define AU1000_RTC_MATCH0_INT 19 872 AU1000_TOY_MATCH2_INT,
855#define AU1000_RTC_MATCH1_INT 20 873 AU1000_RTC_INT,
856#define AU1000_RTC_MATCH2_INT 21 874 AU1000_RTC_MATCH0_INT,
857#define AU1200_NAND_INT 23 875 AU1000_RTC_MATCH1_INT,
858#define AU1200_GPIO_204 24 876 AU1000_RTC_MATCH2_INT,
859#define AU1200_GPIO_205 25 877
860#define AU1200_GPIO_206 26 878 AU1200_NAND_INT = AU1200_FIRST_INT + 23,
861#define AU1200_GPIO_207 27 879 AU1200_GPIO_204,
862#define AU1200_GPIO_208_215 28 // Logical OR of 208:215 880 AU1200_GPIO_205,
863#define AU1200_USB_INT 29 881 AU1200_GPIO_206,
864#define AU1000_USB_HOST_INT AU1200_USB_INT 882 AU1200_GPIO_207,
865#define AU1200_LCD_INT 30 883 AU1200_GPIO_208_215, /* Logical OR of 208:215 */
866#define AU1200_MAE_BOTH_INT 31 884 AU1200_USB_INT,
867#define AU1000_GPIO_0 32 885 AU1000_USB_HOST_INT = AU1200_USB_INT,
868#define AU1000_GPIO_1 33 886 AU1200_LCD_INT,
869#define AU1000_GPIO_2 34 887 AU1200_MAE_BOTH_INT,
870#define AU1000_GPIO_3 35 888 AU1000_GPIO_0,
871#define AU1000_GPIO_4 36 889 AU1000_GPIO_1,
872#define AU1000_GPIO_5 37 890 AU1000_GPIO_2,
873#define AU1000_GPIO_6 38 891 AU1000_GPIO_3,
874#define AU1000_GPIO_7 39 892 AU1000_GPIO_4,
875#define AU1000_GPIO_8 40 893 AU1000_GPIO_5,
876#define AU1000_GPIO_9 41 894 AU1000_GPIO_6,
877#define AU1000_GPIO_10 42 895 AU1000_GPIO_7,
878#define AU1000_GPIO_11 43 896 AU1000_GPIO_8,
879#define AU1000_GPIO_12 44 897 AU1000_GPIO_9,
880#define AU1000_GPIO_13 45 898 AU1000_GPIO_10,
881#define AU1000_GPIO_14 46 899 AU1000_GPIO_11,
882#define AU1000_GPIO_15 47 900 AU1000_GPIO_12,
883#define AU1000_GPIO_16 48 901 AU1000_GPIO_13,
884#define AU1000_GPIO_17 49 902 AU1000_GPIO_14,
885#define AU1000_GPIO_18 50 903 AU1000_GPIO_15,
886#define AU1000_GPIO_19 51 904 AU1000_GPIO_16,
887#define AU1000_GPIO_20 52 905 AU1000_GPIO_17,
888#define AU1000_GPIO_21 53 906 AU1000_GPIO_18,
889#define AU1000_GPIO_22 54 907 AU1000_GPIO_19,
890#define AU1000_GPIO_23 55 908 AU1000_GPIO_20,
891#define AU1000_GPIO_24 56 909 AU1000_GPIO_21,
892#define AU1000_GPIO_25 57 910 AU1000_GPIO_22,
893#define AU1000_GPIO_26 58 911 AU1000_GPIO_23,
894#define AU1000_GPIO_27 59 912 AU1000_GPIO_24,
895#define AU1000_GPIO_28 60 913 AU1000_GPIO_25,
896#define AU1000_GPIO_29 61 914 AU1000_GPIO_26,
897#define AU1000_GPIO_30 62 915 AU1000_GPIO_27,
898#define AU1000_GPIO_31 63 916 AU1000_GPIO_28,
917 AU1000_GPIO_29,
918 AU1000_GPIO_30,
919 AU1000_GPIO_31,
920};
899 921
900#define UART0_ADDR 0xB1100000 922#define UART0_ADDR 0xB1100000
901#define UART1_ADDR 0xB1200000 923#define UART1_ADDR 0xB1200000
@@ -926,10 +948,12 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
926 948
927#endif /* CONFIG_SOC_AU1200 */ 949#endif /* CONFIG_SOC_AU1200 */
928 950
929#define AU1000_LAST_INTC0_INT 31 951#define AU1000_INTC0_INT_BASE (MIPS_CPU_IRQ_BASE + 0)
930#define AU1000_LAST_INTC1_INT 63 952#define AU1000_INTC0_INT_LAST (MIPS_CPU_IRQ_BASE + 31)
931#define AU1000_MAX_INTR 63 953#define AU1000_INTC1_INT_BASE (MIPS_CPU_IRQ_BASE + 32)
932#define INTX 0xFF /* not valid */ 954#define AU1000_INTC1_INT_LAST (MIPS_CPU_IRQ_BASE + 63)
955#define AU1000_MAX_INTR (MIPS_CPU_IRQ_BASE + 63)
956#define INTX 0xFF /* not valid */
933 957
934/* Programmable Counters 0 and 1 */ 958/* Programmable Counters 0 and 1 */
935#define SYS_BASE 0xB1900000 959#define SYS_BASE 0xB1900000
diff --git a/include/asm-mips/mach-db1x00/db1200.h b/include/asm-mips/mach-db1x00/db1200.h
index 647fdb54cc1d..050eae87ff01 100644
--- a/include/asm-mips/mach-db1x00/db1200.h
+++ b/include/asm-mips/mach-db1x00/db1200.h
@@ -181,29 +181,34 @@ static BCSR * const bcsr = (BCSR *)BCSR_KSEG1_ADDR;
181#define NAND_PHYS_ADDR 0x20000000 181#define NAND_PHYS_ADDR 0x20000000
182 182
183/* 183/*
184 * External Interrupts for Pb1200 as of 8/6/2004. 184 * External Interrupts for Pb1200 as of 8/6/2004.
185 * Bit positions in the CPLD registers can be calculated by taking 185 * Bit positions in the CPLD registers can be calculated by taking
186 * the interrupt define and subtracting the DB1200_INT_BEGIN value. 186 * the interrupt define and subtracting the DB1200_INT_BEGIN value.
187 * *example: IDE bis pos is = 64 - 64 187 *
188 ETH bit pos is = 65 - 64 188 * Example: IDE bis pos is = 64 - 64
189 * ETH bit pos is = 65 - 64
189 */ 190 */
190#define DB1200_INT_BEGIN (AU1000_LAST_INTC1_INT + 1) 191enum external_pb1200_ints {
191#define DB1200_IDE_INT (DB1200_INT_BEGIN + 0) 192 DB1200_INT_BEGIN = AU1000_MAX_INTR + 1,
192#define DB1200_ETH_INT (DB1200_INT_BEGIN + 1) 193
193#define DB1200_PC0_INT (DB1200_INT_BEGIN + 2) 194 DB1200_IDE_INT = DB1200_INT_BEGIN,
194#define DB1200_PC0_STSCHG_INT (DB1200_INT_BEGIN + 3) 195 DB1200_ETH_INT,
195#define DB1200_PC1_INT (DB1200_INT_BEGIN + 4) 196 DB1200_PC0_INT,
196#define DB1200_PC1_STSCHG_INT (DB1200_INT_BEGIN + 5) 197 DB1200_PC0_STSCHG_INT,
197#define DB1200_DC_INT (DB1200_INT_BEGIN + 6) 198 DB1200_PC1_INT,
198#define DB1200_FLASHBUSY_INT (DB1200_INT_BEGIN + 7) 199 DB1200_PC1_STSCHG_INT,
199#define DB1200_PC0_INSERT_INT (DB1200_INT_BEGIN + 8) 200 DB1200_DC_INT,
200#define DB1200_PC0_EJECT_INT (DB1200_INT_BEGIN + 9) 201 DB1200_FLASHBUSY_INT,
201#define DB1200_PC1_INSERT_INT (DB1200_INT_BEGIN + 10) 202 DB1200_PC0_INSERT_INT,
202#define DB1200_PC1_EJECT_INT (DB1200_INT_BEGIN + 11) 203 DB1200_PC0_EJECT_INT,
203#define DB1200_SD0_INSERT_INT (DB1200_INT_BEGIN + 12) 204 DB1200_PC1_INSERT_INT,
204#define DB1200_SD0_EJECT_INT (DB1200_INT_BEGIN + 13) 205 DB1200_PC1_EJECT_INT,
205 206 DB1200_SD0_INSERT_INT,
206#define DB1200_INT_END (DB1200_INT_BEGIN + 15) 207 DB1200_SD0_EJECT_INT,
208
209 DB1200_INT_END = DB1200_INT_BEGIN + 15,
210};
211
207 212
208/* For drivers/pcmcia/au1000_db1x00.c */ 213/* For drivers/pcmcia/au1000_db1x00.c */
209 214
diff --git a/include/asm-mips/mach-pb1x00/pb1200.h b/include/asm-mips/mach-pb1x00/pb1200.h
index 409d443322c1..d9f384acfea9 100644
--- a/include/asm-mips/mach-pb1x00/pb1200.h
+++ b/include/asm-mips/mach-pb1x00/pb1200.h
@@ -217,31 +217,35 @@ static BCSR * const bcsr = (BCSR *)BCSR_KSEG1_ADDR;
217 217
218 218
219/* 219/*
220 * External Interrupts for Pb1200 as of 8/6/2004. 220 * External Interrupts for Pb1200 as of 8/6/2004.
221 * Bit positions in the CPLD registers can be calculated by taking 221 * Bit positions in the CPLD registers can be calculated by taking
222 * the interrupt define and subtracting the PB1200_INT_BEGIN value. 222 * the interrupt define and subtracting the PB1200_INT_BEGIN value.
223 * *example: IDE bis pos is = 64 - 64 223 *
224 ETH bit pos is = 65 - 64 224 * Example: IDE bis pos is = 64 - 64
225 * ETH bit pos is = 65 - 64
225 */ 226 */
226#define PB1200_INT_BEGIN (AU1000_LAST_INTC1_INT + 1) 227enum external_pb1200_ints {
227#define PB1200_IDE_INT (PB1200_INT_BEGIN + 0) 228 PB1200_INT_BEGIN = AU1000_MAX_INTR + 1,
228#define PB1200_ETH_INT (PB1200_INT_BEGIN + 1) 229
229#define PB1200_PC0_INT (PB1200_INT_BEGIN + 2) 230 PB1200_IDE_INT = PB1200_INT_BEGIN,
230#define PB1200_PC0_STSCHG_INT (PB1200_INT_BEGIN + 3) 231 PB1200_ETH_INT,
231#define PB1200_PC1_INT (PB1200_INT_BEGIN + 4) 232 PB1200_PC0_INT,
232#define PB1200_PC1_STSCHG_INT (PB1200_INT_BEGIN + 5) 233 PB1200_PC0_STSCHG_INT,
233#define PB1200_DC_INT (PB1200_INT_BEGIN + 6) 234 PB1200_PC1_INT,
234#define PB1200_FLASHBUSY_INT (PB1200_INT_BEGIN + 7) 235 PB1200_PC1_STSCHG_INT,
235#define PB1200_PC0_INSERT_INT (PB1200_INT_BEGIN + 8) 236 PB1200_DC_INT,
236#define PB1200_PC0_EJECT_INT (PB1200_INT_BEGIN + 9) 237 PB1200_FLASHBUSY_INT,
237#define PB1200_PC1_INSERT_INT (PB1200_INT_BEGIN + 10) 238 PB1200_PC0_INSERT_INT,
238#define PB1200_PC1_EJECT_INT (PB1200_INT_BEGIN + 11) 239 PB1200_PC0_EJECT_INT,
239#define PB1200_SD0_INSERT_INT (PB1200_INT_BEGIN + 12) 240 PB1200_PC1_INSERT_INT,
240#define PB1200_SD0_EJECT_INT (PB1200_INT_BEGIN + 13) 241 PB1200_PC1_EJECT_INT,
241#define PB1200_SD1_INSERT_INT (PB1200_INT_BEGIN + 14) 242 PB1200_SD0_INSERT_INT,
242#define PB1200_SD1_EJECT_INT (PB1200_INT_BEGIN + 15) 243 PB1200_SD0_EJECT_INT,
243 244 PB1200_SD1_INSERT_INT,
244#define PB1200_INT_END (PB1200_INT_BEGIN + 15) 245 PB1200_SD1_EJECT_INT,
246
247 PB1200_INT_END (PB1200_INT_BEGIN + 15)
248};
245 249
246/* For drivers/pcmcia/au1000_db1x00.c */ 250/* For drivers/pcmcia/au1000_db1x00.c */
247#define BOARD_PC0_INT PB1200_PC0_INT 251#define BOARD_PC0_INT PB1200_PC0_INT
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 9fa3fa9e62d1..f59d370c5df4 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -25,27 +25,22 @@ struct tss_struct;
25struct mm_struct; 25struct mm_struct;
26struct desc_struct; 26struct desc_struct;
27 27
28/* Lazy mode for batching updates / context switch */ 28/* general info */
29enum paravirt_lazy_mode { 29struct pv_info {
30 PARAVIRT_LAZY_NONE = 0,
31 PARAVIRT_LAZY_MMU = 1,
32 PARAVIRT_LAZY_CPU = 2,
33 PARAVIRT_LAZY_FLUSH = 3,
34};
35
36struct paravirt_ops
37{
38 unsigned int kernel_rpl; 30 unsigned int kernel_rpl;
39 int shared_kernel_pmd; 31 int shared_kernel_pmd;
40 int paravirt_enabled; 32 int paravirt_enabled;
41 const char *name; 33 const char *name;
34};
42 35
36struct pv_init_ops {
43 /* 37 /*
44 * Patch may replace one of the defined code sequences with arbitrary 38 * Patch may replace one of the defined code sequences with
45 * code, subject to the same register constraints. This generally 39 * arbitrary code, subject to the same register constraints.
46 * means the code is not free to clobber any registers other than EAX. 40 * This generally means the code is not free to clobber any
47 * The patch function should return the number of bytes of code 41 * registers other than EAX. The patch function should return
48 * generated, as we nop pad the rest in generic code. 42 * the number of bytes of code generated, as we nop pad the
43 * rest in generic code.
49 */ 44 */
50 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, 45 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
51 unsigned long addr, unsigned len); 46 unsigned long addr, unsigned len);
@@ -55,29 +50,29 @@ struct paravirt_ops
55 char *(*memory_setup)(void); 50 char *(*memory_setup)(void);
56 void (*post_allocator_init)(void); 51 void (*post_allocator_init)(void);
57 52
58 void (*init_IRQ)(void);
59 void (*time_init)(void);
60
61 /*
62 * Called before/after init_mm pagetable setup. setup_start
63 * may reset %cr3, and may pre-install parts of the pagetable;
64 * pagetable setup is expected to preserve any existing
65 * mapping.
66 */
67 void (*pagetable_setup_start)(pgd_t *pgd_base);
68 void (*pagetable_setup_done)(pgd_t *pgd_base);
69
70 /* Print a banner to identify the environment */ 53 /* Print a banner to identify the environment */
71 void (*banner)(void); 54 void (*banner)(void);
55};
56
57
58struct pv_lazy_ops {
59 /* Set deferred update mode, used for batching operations. */
60 void (*enter)(void);
61 void (*leave)(void);
62};
63
64struct pv_time_ops {
65 void (*time_init)(void);
72 66
73 /* Set and set time of day */ 67 /* Set and set time of day */
74 unsigned long (*get_wallclock)(void); 68 unsigned long (*get_wallclock)(void);
75 int (*set_wallclock)(unsigned long); 69 int (*set_wallclock)(unsigned long);
76 70
77 /* cpuid emulation, mostly so that caps bits can be disabled */ 71 unsigned long long (*sched_clock)(void);
78 void (*cpuid)(unsigned int *eax, unsigned int *ebx, 72 unsigned long (*get_cpu_khz)(void);
79 unsigned int *ecx, unsigned int *edx); 73};
80 74
75struct pv_cpu_ops {
81 /* hooks for various privileged instructions */ 76 /* hooks for various privileged instructions */
82 unsigned long (*get_debugreg)(int regno); 77 unsigned long (*get_debugreg)(int regno);
83 void (*set_debugreg)(int regno, unsigned long value); 78 void (*set_debugreg)(int regno, unsigned long value);
@@ -87,41 +82,10 @@ struct paravirt_ops
87 unsigned long (*read_cr0)(void); 82 unsigned long (*read_cr0)(void);
88 void (*write_cr0)(unsigned long); 83 void (*write_cr0)(unsigned long);
89 84
90 unsigned long (*read_cr2)(void);
91 void (*write_cr2)(unsigned long);
92
93 unsigned long (*read_cr3)(void);
94 void (*write_cr3)(unsigned long);
95
96 unsigned long (*read_cr4_safe)(void); 85 unsigned long (*read_cr4_safe)(void);
97 unsigned long (*read_cr4)(void); 86 unsigned long (*read_cr4)(void);
98 void (*write_cr4)(unsigned long); 87 void (*write_cr4)(unsigned long);
99 88
100 /*
101 * Get/set interrupt state. save_fl and restore_fl are only
102 * expected to use X86_EFLAGS_IF; all other bits
103 * returned from save_fl are undefined, and may be ignored by
104 * restore_fl.
105 */
106 unsigned long (*save_fl)(void);
107 void (*restore_fl)(unsigned long);
108 void (*irq_disable)(void);
109 void (*irq_enable)(void);
110 void (*safe_halt)(void);
111 void (*halt)(void);
112
113 void (*wbinvd)(void);
114
115 /* MSR, PMC and TSR operations.
116 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
117 u64 (*read_msr)(unsigned int msr, int *err);
118 int (*write_msr)(unsigned int msr, u64 val);
119
120 u64 (*read_tsc)(void);
121 u64 (*read_pmc)(void);
122 unsigned long long (*sched_clock)(void);
123 unsigned long (*get_cpu_khz)(void);
124
125 /* Segment descriptor handling */ 89 /* Segment descriptor handling */
126 void (*load_tr_desc)(void); 90 void (*load_tr_desc)(void);
127 void (*load_gdt)(const struct Xgt_desc_struct *); 91 void (*load_gdt)(const struct Xgt_desc_struct *);
@@ -140,18 +104,47 @@ struct paravirt_ops
140 void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); 104 void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
141 105
142 void (*set_iopl_mask)(unsigned mask); 106 void (*set_iopl_mask)(unsigned mask);
107
108 void (*wbinvd)(void);
143 void (*io_delay)(void); 109 void (*io_delay)(void);
144 110
111 /* cpuid emulation, mostly so that caps bits can be disabled */
112 void (*cpuid)(unsigned int *eax, unsigned int *ebx,
113 unsigned int *ecx, unsigned int *edx);
114
115 /* MSR, PMC and TSR operations.
116 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
117 u64 (*read_msr)(unsigned int msr, int *err);
118 int (*write_msr)(unsigned int msr, u64 val);
119
120 u64 (*read_tsc)(void);
121 u64 (*read_pmc)(void);
122
123 /* These two are jmp to, not actually called. */
124 void (*irq_enable_sysexit)(void);
125 void (*iret)(void);
126
127 struct pv_lazy_ops lazy_mode;
128};
129
130struct pv_irq_ops {
131 void (*init_IRQ)(void);
132
145 /* 133 /*
146 * Hooks for intercepting the creation/use/destruction of an 134 * Get/set interrupt state. save_fl and restore_fl are only
147 * mm_struct. 135 * expected to use X86_EFLAGS_IF; all other bits
136 * returned from save_fl are undefined, and may be ignored by
137 * restore_fl.
148 */ 138 */
149 void (*activate_mm)(struct mm_struct *prev, 139 unsigned long (*save_fl)(void);
150 struct mm_struct *next); 140 void (*restore_fl)(unsigned long);
151 void (*dup_mmap)(struct mm_struct *oldmm, 141 void (*irq_disable)(void);
152 struct mm_struct *mm); 142 void (*irq_enable)(void);
153 void (*exit_mmap)(struct mm_struct *mm); 143 void (*safe_halt)(void);
144 void (*halt)(void);
145};
154 146
147struct pv_apic_ops {
155#ifdef CONFIG_X86_LOCAL_APIC 148#ifdef CONFIG_X86_LOCAL_APIC
156 /* 149 /*
157 * Direct APIC operations, principally for VMI. Ideally 150 * Direct APIC operations, principally for VMI. Ideally
@@ -167,6 +160,34 @@ struct paravirt_ops
167 unsigned long start_eip, 160 unsigned long start_eip,
168 unsigned long start_esp); 161 unsigned long start_esp);
169#endif 162#endif
163};
164
165struct pv_mmu_ops {
166 /*
167 * Called before/after init_mm pagetable setup. setup_start
168 * may reset %cr3, and may pre-install parts of the pagetable;
169 * pagetable setup is expected to preserve any existing
170 * mapping.
171 */
172 void (*pagetable_setup_start)(pgd_t *pgd_base);
173 void (*pagetable_setup_done)(pgd_t *pgd_base);
174
175 unsigned long (*read_cr2)(void);
176 void (*write_cr2)(unsigned long);
177
178 unsigned long (*read_cr3)(void);
179 void (*write_cr3)(unsigned long);
180
181 /*
182 * Hooks for intercepting the creation/use/destruction of an
183 * mm_struct.
184 */
185 void (*activate_mm)(struct mm_struct *prev,
186 struct mm_struct *next);
187 void (*dup_mmap)(struct mm_struct *oldmm,
188 struct mm_struct *mm);
189 void (*exit_mmap)(struct mm_struct *mm);
190
170 191
171 /* TLB operations */ 192 /* TLB operations */
172 void (*flush_tlb_user)(void); 193 void (*flush_tlb_user)(void);
@@ -191,15 +212,12 @@ struct paravirt_ops
191 void (*pte_update_defer)(struct mm_struct *mm, 212 void (*pte_update_defer)(struct mm_struct *mm,
192 unsigned long addr, pte_t *ptep); 213 unsigned long addr, pte_t *ptep);
193 214
194#ifdef CONFIG_HIGHPTE
195 void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
196#endif
197
198#ifdef CONFIG_X86_PAE 215#ifdef CONFIG_X86_PAE
199 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 216 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
200 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); 217 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
218 pte_t *ptep, pte_t pte);
201 void (*set_pud)(pud_t *pudp, pud_t pudval); 219 void (*set_pud)(pud_t *pudp, pud_t pudval);
202 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 220 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
203 void (*pmd_clear)(pmd_t *pmdp); 221 void (*pmd_clear)(pmd_t *pmdp);
204 222
205 unsigned long long (*pte_val)(pte_t); 223 unsigned long long (*pte_val)(pte_t);
@@ -217,21 +235,40 @@ struct paravirt_ops
217 pgd_t (*make_pgd)(unsigned long pgd); 235 pgd_t (*make_pgd)(unsigned long pgd);
218#endif 236#endif
219 237
220 /* Set deferred update mode, used for batching operations. */ 238#ifdef CONFIG_HIGHPTE
221 void (*set_lazy_mode)(enum paravirt_lazy_mode mode); 239 void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
240#endif
222 241
223 /* These two are jmp to, not actually called. */ 242 struct pv_lazy_ops lazy_mode;
224 void (*irq_enable_sysexit)(void);
225 void (*iret)(void);
226}; 243};
227 244
228extern struct paravirt_ops paravirt_ops; 245/* This contains all the paravirt structures: we get a convenient
246 * number for each function using the offset which we use to indicate
247 * what to patch. */
248struct paravirt_patch_template
249{
250 struct pv_init_ops pv_init_ops;
251 struct pv_time_ops pv_time_ops;
252 struct pv_cpu_ops pv_cpu_ops;
253 struct pv_irq_ops pv_irq_ops;
254 struct pv_apic_ops pv_apic_ops;
255 struct pv_mmu_ops pv_mmu_ops;
256};
257
258extern struct pv_info pv_info;
259extern struct pv_init_ops pv_init_ops;
260extern struct pv_time_ops pv_time_ops;
261extern struct pv_cpu_ops pv_cpu_ops;
262extern struct pv_irq_ops pv_irq_ops;
263extern struct pv_apic_ops pv_apic_ops;
264extern struct pv_mmu_ops pv_mmu_ops;
229 265
230#define PARAVIRT_PATCH(x) \ 266#define PARAVIRT_PATCH(x) \
231 (offsetof(struct paravirt_ops, x) / sizeof(void *)) 267 (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
232 268
233#define paravirt_type(type) \ 269#define paravirt_type(op) \
234 [paravirt_typenum] "i" (PARAVIRT_PATCH(type)) 270 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
271 [paravirt_opptr] "m" (op)
235#define paravirt_clobber(clobber) \ 272#define paravirt_clobber(clobber) \
236 [paravirt_clobber] "i" (clobber) 273 [paravirt_clobber] "i" (clobber)
237 274
@@ -258,7 +295,7 @@ unsigned paravirt_patch_call(void *insnbuf,
258 const void *target, u16 tgt_clobbers, 295 const void *target, u16 tgt_clobbers,
259 unsigned long addr, u16 site_clobbers, 296 unsigned long addr, u16 site_clobbers,
260 unsigned len); 297 unsigned len);
261unsigned paravirt_patch_jmp(const void *target, void *insnbuf, 298unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
262 unsigned long addr, unsigned len); 299 unsigned long addr, unsigned len);
263unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 300unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
264 unsigned long addr, unsigned len); 301 unsigned long addr, unsigned len);
@@ -271,14 +308,14 @@ int paravirt_disable_iospace(void);
271/* 308/*
272 * This generates an indirect call based on the operation type number. 309 * This generates an indirect call based on the operation type number.
273 * The type number, computed in PARAVIRT_PATCH, is derived from the 310 * The type number, computed in PARAVIRT_PATCH, is derived from the
274 * offset into the paravirt_ops structure, and can therefore be freely 311 * offset into the paravirt_patch_template structure, and can therefore be
275 * converted back into a structure offset. 312 * freely converted back into a structure offset.
276 */ 313 */
277#define PARAVIRT_CALL "call *(paravirt_ops+%c[paravirt_typenum]*4);" 314#define PARAVIRT_CALL "call *%[paravirt_opptr];"
278 315
279/* 316/*
280 * These macros are intended to wrap calls into a paravirt_ops 317 * These macros are intended to wrap calls through one of the paravirt
281 * operation, so that they can be later identified and patched at 318 * ops structs, so that they can be later identified and patched at
282 * runtime. 319 * runtime.
283 * 320 *
284 * Normally, a call to a pv_op function is a simple indirect call: 321 * Normally, a call to a pv_op function is a simple indirect call:
@@ -301,7 +338,7 @@ int paravirt_disable_iospace(void);
301 * The call instruction itself is marked by placing its start address 338 * The call instruction itself is marked by placing its start address
302 * and size into the .parainstructions section, so that 339 * and size into the .parainstructions section, so that
303 * apply_paravirt() in arch/i386/kernel/alternative.c can do the 340 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
304 * appropriate patching under the control of the backend paravirt_ops 341 * appropriate patching under the control of the backend pv_init_ops
305 * implementation. 342 * implementation.
306 * 343 *
307 * Unfortunately there's no way to get gcc to generate the args setup 344 * Unfortunately there's no way to get gcc to generate the args setup
@@ -409,36 +446,36 @@ int paravirt_disable_iospace(void);
409 446
410static inline int paravirt_enabled(void) 447static inline int paravirt_enabled(void)
411{ 448{
412 return paravirt_ops.paravirt_enabled; 449 return pv_info.paravirt_enabled;
413} 450}
414 451
415static inline void load_esp0(struct tss_struct *tss, 452static inline void load_esp0(struct tss_struct *tss,
416 struct thread_struct *thread) 453 struct thread_struct *thread)
417{ 454{
418 PVOP_VCALL2(load_esp0, tss, thread); 455 PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread);
419} 456}
420 457
421#define ARCH_SETUP paravirt_ops.arch_setup(); 458#define ARCH_SETUP pv_init_ops.arch_setup();
422static inline unsigned long get_wallclock(void) 459static inline unsigned long get_wallclock(void)
423{ 460{
424 return PVOP_CALL0(unsigned long, get_wallclock); 461 return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock);
425} 462}
426 463
427static inline int set_wallclock(unsigned long nowtime) 464static inline int set_wallclock(unsigned long nowtime)
428{ 465{
429 return PVOP_CALL1(int, set_wallclock, nowtime); 466 return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime);
430} 467}
431 468
432static inline void (*choose_time_init(void))(void) 469static inline void (*choose_time_init(void))(void)
433{ 470{
434 return paravirt_ops.time_init; 471 return pv_time_ops.time_init;
435} 472}
436 473
437/* The paravirtualized CPUID instruction. */ 474/* The paravirtualized CPUID instruction. */
438static inline void __cpuid(unsigned int *eax, unsigned int *ebx, 475static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
439 unsigned int *ecx, unsigned int *edx) 476 unsigned int *ecx, unsigned int *edx)
440{ 477{
441 PVOP_VCALL4(cpuid, eax, ebx, ecx, edx); 478 PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
442} 479}
443 480
444/* 481/*
@@ -446,87 +483,87 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
446 */ 483 */
447static inline unsigned long paravirt_get_debugreg(int reg) 484static inline unsigned long paravirt_get_debugreg(int reg)
448{ 485{
449 return PVOP_CALL1(unsigned long, get_debugreg, reg); 486 return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
450} 487}
451#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) 488#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
452static inline void set_debugreg(unsigned long val, int reg) 489static inline void set_debugreg(unsigned long val, int reg)
453{ 490{
454 PVOP_VCALL2(set_debugreg, reg, val); 491 PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
455} 492}
456 493
457static inline void clts(void) 494static inline void clts(void)
458{ 495{
459 PVOP_VCALL0(clts); 496 PVOP_VCALL0(pv_cpu_ops.clts);
460} 497}
461 498
462static inline unsigned long read_cr0(void) 499static inline unsigned long read_cr0(void)
463{ 500{
464 return PVOP_CALL0(unsigned long, read_cr0); 501 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
465} 502}
466 503
467static inline void write_cr0(unsigned long x) 504static inline void write_cr0(unsigned long x)
468{ 505{
469 PVOP_VCALL1(write_cr0, x); 506 PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
470} 507}
471 508
472static inline unsigned long read_cr2(void) 509static inline unsigned long read_cr2(void)
473{ 510{
474 return PVOP_CALL0(unsigned long, read_cr2); 511 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
475} 512}
476 513
477static inline void write_cr2(unsigned long x) 514static inline void write_cr2(unsigned long x)
478{ 515{
479 PVOP_VCALL1(write_cr2, x); 516 PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
480} 517}
481 518
482static inline unsigned long read_cr3(void) 519static inline unsigned long read_cr3(void)
483{ 520{
484 return PVOP_CALL0(unsigned long, read_cr3); 521 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
485} 522}
486 523
487static inline void write_cr3(unsigned long x) 524static inline void write_cr3(unsigned long x)
488{ 525{
489 PVOP_VCALL1(write_cr3, x); 526 PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
490} 527}
491 528
492static inline unsigned long read_cr4(void) 529static inline unsigned long read_cr4(void)
493{ 530{
494 return PVOP_CALL0(unsigned long, read_cr4); 531 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
495} 532}
496static inline unsigned long read_cr4_safe(void) 533static inline unsigned long read_cr4_safe(void)
497{ 534{
498 return PVOP_CALL0(unsigned long, read_cr4_safe); 535 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
499} 536}
500 537
501static inline void write_cr4(unsigned long x) 538static inline void write_cr4(unsigned long x)
502{ 539{
503 PVOP_VCALL1(write_cr4, x); 540 PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
504} 541}
505 542
506static inline void raw_safe_halt(void) 543static inline void raw_safe_halt(void)
507{ 544{
508 PVOP_VCALL0(safe_halt); 545 PVOP_VCALL0(pv_irq_ops.safe_halt);
509} 546}
510 547
511static inline void halt(void) 548static inline void halt(void)
512{ 549{
513 PVOP_VCALL0(safe_halt); 550 PVOP_VCALL0(pv_irq_ops.safe_halt);
514} 551}
515 552
516static inline void wbinvd(void) 553static inline void wbinvd(void)
517{ 554{
518 PVOP_VCALL0(wbinvd); 555 PVOP_VCALL0(pv_cpu_ops.wbinvd);
519} 556}
520 557
521#define get_kernel_rpl() (paravirt_ops.kernel_rpl) 558#define get_kernel_rpl() (pv_info.kernel_rpl)
522 559
523static inline u64 paravirt_read_msr(unsigned msr, int *err) 560static inline u64 paravirt_read_msr(unsigned msr, int *err)
524{ 561{
525 return PVOP_CALL2(u64, read_msr, msr, err); 562 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
526} 563}
527static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) 564static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
528{ 565{
529 return PVOP_CALL3(int, write_msr, msr, low, high); 566 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
530} 567}
531 568
532/* These should all do BUG_ON(_err), but our headers are too tangled. */ 569/* These should all do BUG_ON(_err), but our headers are too tangled. */
@@ -560,7 +597,7 @@ static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
560 597
561static inline u64 paravirt_read_tsc(void) 598static inline u64 paravirt_read_tsc(void)
562{ 599{
563 return PVOP_CALL0(u64, read_tsc); 600 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
564} 601}
565 602
566#define rdtscl(low) do { \ 603#define rdtscl(low) do { \
@@ -572,15 +609,15 @@ static inline u64 paravirt_read_tsc(void)
572 609
573static inline unsigned long long paravirt_sched_clock(void) 610static inline unsigned long long paravirt_sched_clock(void)
574{ 611{
575 return PVOP_CALL0(unsigned long long, sched_clock); 612 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
576} 613}
577#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) 614#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
578 615
579#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 616#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
580 617
581static inline unsigned long long paravirt_read_pmc(int counter) 618static inline unsigned long long paravirt_read_pmc(int counter)
582{ 619{
583 return PVOP_CALL1(u64, read_pmc, counter); 620 return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
584} 621}
585 622
586#define rdpmc(counter,low,high) do { \ 623#define rdpmc(counter,low,high) do { \
@@ -591,61 +628,61 @@ static inline unsigned long long paravirt_read_pmc(int counter)
591 628
592static inline void load_TR_desc(void) 629static inline void load_TR_desc(void)
593{ 630{
594 PVOP_VCALL0(load_tr_desc); 631 PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
595} 632}
596static inline void load_gdt(const struct Xgt_desc_struct *dtr) 633static inline void load_gdt(const struct Xgt_desc_struct *dtr)
597{ 634{
598 PVOP_VCALL1(load_gdt, dtr); 635 PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
599} 636}
600static inline void load_idt(const struct Xgt_desc_struct *dtr) 637static inline void load_idt(const struct Xgt_desc_struct *dtr)
601{ 638{
602 PVOP_VCALL1(load_idt, dtr); 639 PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
603} 640}
604static inline void set_ldt(const void *addr, unsigned entries) 641static inline void set_ldt(const void *addr, unsigned entries)
605{ 642{
606 PVOP_VCALL2(set_ldt, addr, entries); 643 PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
607} 644}
608static inline void store_gdt(struct Xgt_desc_struct *dtr) 645static inline void store_gdt(struct Xgt_desc_struct *dtr)
609{ 646{
610 PVOP_VCALL1(store_gdt, dtr); 647 PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
611} 648}
612static inline void store_idt(struct Xgt_desc_struct *dtr) 649static inline void store_idt(struct Xgt_desc_struct *dtr)
613{ 650{
614 PVOP_VCALL1(store_idt, dtr); 651 PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
615} 652}
616static inline unsigned long paravirt_store_tr(void) 653static inline unsigned long paravirt_store_tr(void)
617{ 654{
618 return PVOP_CALL0(unsigned long, store_tr); 655 return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
619} 656}
620#define store_tr(tr) ((tr) = paravirt_store_tr()) 657#define store_tr(tr) ((tr) = paravirt_store_tr())
621static inline void load_TLS(struct thread_struct *t, unsigned cpu) 658static inline void load_TLS(struct thread_struct *t, unsigned cpu)
622{ 659{
623 PVOP_VCALL2(load_tls, t, cpu); 660 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
624} 661}
625static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) 662static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
626{ 663{
627 PVOP_VCALL4(write_ldt_entry, dt, entry, low, high); 664 PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high);
628} 665}
629static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) 666static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
630{ 667{
631 PVOP_VCALL4(write_gdt_entry, dt, entry, low, high); 668 PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high);
632} 669}
633static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) 670static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high)
634{ 671{
635 PVOP_VCALL4(write_idt_entry, dt, entry, low, high); 672 PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high);
636} 673}
637static inline void set_iopl_mask(unsigned mask) 674static inline void set_iopl_mask(unsigned mask)
638{ 675{
639 PVOP_VCALL1(set_iopl_mask, mask); 676 PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
640} 677}
641 678
642/* The paravirtualized I/O functions */ 679/* The paravirtualized I/O functions */
643static inline void slow_down_io(void) { 680static inline void slow_down_io(void) {
644 paravirt_ops.io_delay(); 681 pv_cpu_ops.io_delay();
645#ifdef REALLY_SLOW_IO 682#ifdef REALLY_SLOW_IO
646 paravirt_ops.io_delay(); 683 pv_cpu_ops.io_delay();
647 paravirt_ops.io_delay(); 684 pv_cpu_ops.io_delay();
648 paravirt_ops.io_delay(); 685 pv_cpu_ops.io_delay();
649#endif 686#endif
650} 687}
651 688
@@ -655,121 +692,120 @@ static inline void slow_down_io(void) {
655 */ 692 */
656static inline void apic_write(unsigned long reg, unsigned long v) 693static inline void apic_write(unsigned long reg, unsigned long v)
657{ 694{
658 PVOP_VCALL2(apic_write, reg, v); 695 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
659} 696}
660 697
661static inline void apic_write_atomic(unsigned long reg, unsigned long v) 698static inline void apic_write_atomic(unsigned long reg, unsigned long v)
662{ 699{
663 PVOP_VCALL2(apic_write_atomic, reg, v); 700 PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
664} 701}
665 702
666static inline unsigned long apic_read(unsigned long reg) 703static inline unsigned long apic_read(unsigned long reg)
667{ 704{
668 return PVOP_CALL1(unsigned long, apic_read, reg); 705 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
669} 706}
670 707
671static inline void setup_boot_clock(void) 708static inline void setup_boot_clock(void)
672{ 709{
673 PVOP_VCALL0(setup_boot_clock); 710 PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
674} 711}
675 712
676static inline void setup_secondary_clock(void) 713static inline void setup_secondary_clock(void)
677{ 714{
678 PVOP_VCALL0(setup_secondary_clock); 715 PVOP_VCALL0(pv_apic_ops.setup_secondary_clock);
679} 716}
680#endif 717#endif
681 718
682static inline void paravirt_post_allocator_init(void) 719static inline void paravirt_post_allocator_init(void)
683{ 720{
684 if (paravirt_ops.post_allocator_init) 721 if (pv_init_ops.post_allocator_init)
685 (*paravirt_ops.post_allocator_init)(); 722 (*pv_init_ops.post_allocator_init)();
686} 723}
687 724
688static inline void paravirt_pagetable_setup_start(pgd_t *base) 725static inline void paravirt_pagetable_setup_start(pgd_t *base)
689{ 726{
690 if (paravirt_ops.pagetable_setup_start) 727 (*pv_mmu_ops.pagetable_setup_start)(base);
691 (*paravirt_ops.pagetable_setup_start)(base);
692} 728}
693 729
694static inline void paravirt_pagetable_setup_done(pgd_t *base) 730static inline void paravirt_pagetable_setup_done(pgd_t *base)
695{ 731{
696 if (paravirt_ops.pagetable_setup_done) 732 (*pv_mmu_ops.pagetable_setup_done)(base);
697 (*paravirt_ops.pagetable_setup_done)(base);
698} 733}
699 734
700#ifdef CONFIG_SMP 735#ifdef CONFIG_SMP
701static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, 736static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
702 unsigned long start_esp) 737 unsigned long start_esp)
703{ 738{
704 PVOP_VCALL3(startup_ipi_hook, phys_apicid, start_eip, start_esp); 739 PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
740 phys_apicid, start_eip, start_esp);
705} 741}
706#endif 742#endif
707 743
708static inline void paravirt_activate_mm(struct mm_struct *prev, 744static inline void paravirt_activate_mm(struct mm_struct *prev,
709 struct mm_struct *next) 745 struct mm_struct *next)
710{ 746{
711 PVOP_VCALL2(activate_mm, prev, next); 747 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
712} 748}
713 749
714static inline void arch_dup_mmap(struct mm_struct *oldmm, 750static inline void arch_dup_mmap(struct mm_struct *oldmm,
715 struct mm_struct *mm) 751 struct mm_struct *mm)
716{ 752{
717 PVOP_VCALL2(dup_mmap, oldmm, mm); 753 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
718} 754}
719 755
720static inline void arch_exit_mmap(struct mm_struct *mm) 756static inline void arch_exit_mmap(struct mm_struct *mm)
721{ 757{
722 PVOP_VCALL1(exit_mmap, mm); 758 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
723} 759}
724 760
725static inline void __flush_tlb(void) 761static inline void __flush_tlb(void)
726{ 762{
727 PVOP_VCALL0(flush_tlb_user); 763 PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
728} 764}
729static inline void __flush_tlb_global(void) 765static inline void __flush_tlb_global(void)
730{ 766{
731 PVOP_VCALL0(flush_tlb_kernel); 767 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
732} 768}
733static inline void __flush_tlb_single(unsigned long addr) 769static inline void __flush_tlb_single(unsigned long addr)
734{ 770{
735 PVOP_VCALL1(flush_tlb_single, addr); 771 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
736} 772}
737 773
738static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 774static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
739 unsigned long va) 775 unsigned long va)
740{ 776{
741 PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); 777 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
742} 778}
743 779
744static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) 780static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn)
745{ 781{
746 PVOP_VCALL2(alloc_pt, mm, pfn); 782 PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn);
747} 783}
748static inline void paravirt_release_pt(unsigned pfn) 784static inline void paravirt_release_pt(unsigned pfn)
749{ 785{
750 PVOP_VCALL1(release_pt, pfn); 786 PVOP_VCALL1(pv_mmu_ops.release_pt, pfn);
751} 787}
752 788
753static inline void paravirt_alloc_pd(unsigned pfn) 789static inline void paravirt_alloc_pd(unsigned pfn)
754{ 790{
755 PVOP_VCALL1(alloc_pd, pfn); 791 PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn);
756} 792}
757 793
758static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, 794static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn,
759 unsigned start, unsigned count) 795 unsigned start, unsigned count)
760{ 796{
761 PVOP_VCALL4(alloc_pd_clone, pfn, clonepfn, start, count); 797 PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count);
762} 798}
763static inline void paravirt_release_pd(unsigned pfn) 799static inline void paravirt_release_pd(unsigned pfn)
764{ 800{
765 PVOP_VCALL1(release_pd, pfn); 801 PVOP_VCALL1(pv_mmu_ops.release_pd, pfn);
766} 802}
767 803
768#ifdef CONFIG_HIGHPTE 804#ifdef CONFIG_HIGHPTE
769static inline void *kmap_atomic_pte(struct page *page, enum km_type type) 805static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
770{ 806{
771 unsigned long ret; 807 unsigned long ret;
772 ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type); 808 ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
773 return (void *)ret; 809 return (void *)ret;
774} 810}
775#endif 811#endif
@@ -777,162 +813,191 @@ static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
777static inline void pte_update(struct mm_struct *mm, unsigned long addr, 813static inline void pte_update(struct mm_struct *mm, unsigned long addr,
778 pte_t *ptep) 814 pte_t *ptep)
779{ 815{
780 PVOP_VCALL3(pte_update, mm, addr, ptep); 816 PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
781} 817}
782 818
783static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, 819static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
784 pte_t *ptep) 820 pte_t *ptep)
785{ 821{
786 PVOP_VCALL3(pte_update_defer, mm, addr, ptep); 822 PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
787} 823}
788 824
789#ifdef CONFIG_X86_PAE 825#ifdef CONFIG_X86_PAE
790static inline pte_t __pte(unsigned long long val) 826static inline pte_t __pte(unsigned long long val)
791{ 827{
792 unsigned long long ret = PVOP_CALL2(unsigned long long, make_pte, 828 unsigned long long ret = PVOP_CALL2(unsigned long long,
829 pv_mmu_ops.make_pte,
793 val, val >> 32); 830 val, val >> 32);
794 return (pte_t) { ret, ret >> 32 }; 831 return (pte_t) { ret, ret >> 32 };
795} 832}
796 833
797static inline pmd_t __pmd(unsigned long long val) 834static inline pmd_t __pmd(unsigned long long val)
798{ 835{
799 return (pmd_t) { PVOP_CALL2(unsigned long long, make_pmd, val, val >> 32) }; 836 return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd,
837 val, val >> 32) };
800} 838}
801 839
802static inline pgd_t __pgd(unsigned long long val) 840static inline pgd_t __pgd(unsigned long long val)
803{ 841{
804 return (pgd_t) { PVOP_CALL2(unsigned long long, make_pgd, val, val >> 32) }; 842 return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd,
843 val, val >> 32) };
805} 844}
806 845
807static inline unsigned long long pte_val(pte_t x) 846static inline unsigned long long pte_val(pte_t x)
808{ 847{
809 return PVOP_CALL2(unsigned long long, pte_val, x.pte_low, x.pte_high); 848 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val,
849 x.pte_low, x.pte_high);
810} 850}
811 851
812static inline unsigned long long pmd_val(pmd_t x) 852static inline unsigned long long pmd_val(pmd_t x)
813{ 853{
814 return PVOP_CALL2(unsigned long long, pmd_val, x.pmd, x.pmd >> 32); 854 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
855 x.pmd, x.pmd >> 32);
815} 856}
816 857
817static inline unsigned long long pgd_val(pgd_t x) 858static inline unsigned long long pgd_val(pgd_t x)
818{ 859{
819 return PVOP_CALL2(unsigned long long, pgd_val, x.pgd, x.pgd >> 32); 860 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val,
861 x.pgd, x.pgd >> 32);
820} 862}
821 863
822static inline void set_pte(pte_t *ptep, pte_t pteval) 864static inline void set_pte(pte_t *ptep, pte_t pteval)
823{ 865{
824 PVOP_VCALL3(set_pte, ptep, pteval.pte_low, pteval.pte_high); 866 PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high);
825} 867}
826 868
827static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 869static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
828 pte_t *ptep, pte_t pteval) 870 pte_t *ptep, pte_t pteval)
829{ 871{
830 /* 5 arg words */ 872 /* 5 arg words */
831 paravirt_ops.set_pte_at(mm, addr, ptep, pteval); 873 pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval);
832} 874}
833 875
834static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) 876static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
835{ 877{
836 PVOP_VCALL3(set_pte_atomic, ptep, pteval.pte_low, pteval.pte_high); 878 PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
879 pteval.pte_low, pteval.pte_high);
837} 880}
838 881
839static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, 882static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
840 pte_t *ptep, pte_t pte) 883 pte_t *ptep, pte_t pte)
841{ 884{
842 /* 5 arg words */ 885 /* 5 arg words */
843 paravirt_ops.set_pte_present(mm, addr, ptep, pte); 886 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
844} 887}
845 888
846static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) 889static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
847{ 890{
848 PVOP_VCALL3(set_pmd, pmdp, pmdval.pmd, pmdval.pmd >> 32); 891 PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp,
892 pmdval.pmd, pmdval.pmd >> 32);
849} 893}
850 894
851static inline void set_pud(pud_t *pudp, pud_t pudval) 895static inline void set_pud(pud_t *pudp, pud_t pudval)
852{ 896{
853 PVOP_VCALL3(set_pud, pudp, pudval.pgd.pgd, pudval.pgd.pgd >> 32); 897 PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
898 pudval.pgd.pgd, pudval.pgd.pgd >> 32);
854} 899}
855 900
856static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 901static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
857{ 902{
858 PVOP_VCALL3(pte_clear, mm, addr, ptep); 903 PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
859} 904}
860 905
861static inline void pmd_clear(pmd_t *pmdp) 906static inline void pmd_clear(pmd_t *pmdp)
862{ 907{
863 PVOP_VCALL1(pmd_clear, pmdp); 908 PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
864} 909}
865 910
866#else /* !CONFIG_X86_PAE */ 911#else /* !CONFIG_X86_PAE */
867 912
868static inline pte_t __pte(unsigned long val) 913static inline pte_t __pte(unsigned long val)
869{ 914{
870 return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) }; 915 return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) };
871} 916}
872 917
873static inline pgd_t __pgd(unsigned long val) 918static inline pgd_t __pgd(unsigned long val)
874{ 919{
875 return (pgd_t) { PVOP_CALL1(unsigned long, make_pgd, val) }; 920 return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) };
876} 921}
877 922
878static inline unsigned long pte_val(pte_t x) 923static inline unsigned long pte_val(pte_t x)
879{ 924{
880 return PVOP_CALL1(unsigned long, pte_val, x.pte_low); 925 return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low);
881} 926}
882 927
883static inline unsigned long pgd_val(pgd_t x) 928static inline unsigned long pgd_val(pgd_t x)
884{ 929{
885 return PVOP_CALL1(unsigned long, pgd_val, x.pgd); 930 return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd);
886} 931}
887 932
888static inline void set_pte(pte_t *ptep, pte_t pteval) 933static inline void set_pte(pte_t *ptep, pte_t pteval)
889{ 934{
890 PVOP_VCALL2(set_pte, ptep, pteval.pte_low); 935 PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low);
891} 936}
892 937
893static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 938static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
894 pte_t *ptep, pte_t pteval) 939 pte_t *ptep, pte_t pteval)
895{ 940{
896 PVOP_VCALL4(set_pte_at, mm, addr, ptep, pteval.pte_low); 941 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low);
897} 942}
898 943
899static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) 944static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
900{ 945{
901 PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd); 946 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
902} 947}
903#endif /* CONFIG_X86_PAE */ 948#endif /* CONFIG_X86_PAE */
904 949
950/* Lazy mode for batching updates / context switch */
951enum paravirt_lazy_mode {
952 PARAVIRT_LAZY_NONE,
953 PARAVIRT_LAZY_MMU,
954 PARAVIRT_LAZY_CPU,
955};
956
957enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
958void paravirt_enter_lazy_cpu(void);
959void paravirt_leave_lazy_cpu(void);
960void paravirt_enter_lazy_mmu(void);
961void paravirt_leave_lazy_mmu(void);
962void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
963
905#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 964#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE
906static inline void arch_enter_lazy_cpu_mode(void) 965static inline void arch_enter_lazy_cpu_mode(void)
907{ 966{
908 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_CPU); 967 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
909} 968}
910 969
911static inline void arch_leave_lazy_cpu_mode(void) 970static inline void arch_leave_lazy_cpu_mode(void)
912{ 971{
913 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); 972 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
914} 973}
915 974
916static inline void arch_flush_lazy_cpu_mode(void) 975static inline void arch_flush_lazy_cpu_mode(void)
917{ 976{
918 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); 977 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
978 arch_leave_lazy_cpu_mode();
979 arch_enter_lazy_cpu_mode();
980 }
919} 981}
920 982
921 983
922#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 984#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
923static inline void arch_enter_lazy_mmu_mode(void) 985static inline void arch_enter_lazy_mmu_mode(void)
924{ 986{
925 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_MMU); 987 PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
926} 988}
927 989
928static inline void arch_leave_lazy_mmu_mode(void) 990static inline void arch_leave_lazy_mmu_mode(void)
929{ 991{
930 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE); 992 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
931} 993}
932 994
933static inline void arch_flush_lazy_mmu_mode(void) 995static inline void arch_flush_lazy_mmu_mode(void)
934{ 996{
935 PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH); 997 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
998 arch_leave_lazy_mmu_mode();
999 arch_enter_lazy_mmu_mode();
1000 }
936} 1001}
937 1002
938void _paravirt_nop(void); 1003void _paravirt_nop(void);
@@ -957,7 +1022,7 @@ static inline unsigned long __raw_local_save_flags(void)
957 PARAVIRT_CALL 1022 PARAVIRT_CALL
958 "popl %%edx; popl %%ecx") 1023 "popl %%edx; popl %%ecx")
959 : "=a"(f) 1024 : "=a"(f)
960 : paravirt_type(save_fl), 1025 : paravirt_type(pv_irq_ops.save_fl),
961 paravirt_clobber(CLBR_EAX) 1026 paravirt_clobber(CLBR_EAX)
962 : "memory", "cc"); 1027 : "memory", "cc");
963 return f; 1028 return f;
@@ -970,7 +1035,7 @@ static inline void raw_local_irq_restore(unsigned long f)
970 "popl %%edx; popl %%ecx") 1035 "popl %%edx; popl %%ecx")
971 : "=a"(f) 1036 : "=a"(f)
972 : "0"(f), 1037 : "0"(f),
973 paravirt_type(restore_fl), 1038 paravirt_type(pv_irq_ops.restore_fl),
974 paravirt_clobber(CLBR_EAX) 1039 paravirt_clobber(CLBR_EAX)
975 : "memory", "cc"); 1040 : "memory", "cc");
976} 1041}
@@ -981,7 +1046,7 @@ static inline void raw_local_irq_disable(void)
981 PARAVIRT_CALL 1046 PARAVIRT_CALL
982 "popl %%edx; popl %%ecx") 1047 "popl %%edx; popl %%ecx")
983 : 1048 :
984 : paravirt_type(irq_disable), 1049 : paravirt_type(pv_irq_ops.irq_disable),
985 paravirt_clobber(CLBR_EAX) 1050 paravirt_clobber(CLBR_EAX)
986 : "memory", "eax", "cc"); 1051 : "memory", "eax", "cc");
987} 1052}
@@ -992,7 +1057,7 @@ static inline void raw_local_irq_enable(void)
992 PARAVIRT_CALL 1057 PARAVIRT_CALL
993 "popl %%edx; popl %%ecx") 1058 "popl %%edx; popl %%ecx")
994 : 1059 :
995 : paravirt_type(irq_enable), 1060 : paravirt_type(pv_irq_ops.irq_enable),
996 paravirt_clobber(CLBR_EAX) 1061 paravirt_clobber(CLBR_EAX)
997 : "memory", "eax", "cc"); 1062 : "memory", "eax", "cc");
998} 1063}
@@ -1008,21 +1073,23 @@ static inline unsigned long __raw_local_irq_save(void)
1008 1073
1009#define CLI_STRING \ 1074#define CLI_STRING \
1010 _paravirt_alt("pushl %%ecx; pushl %%edx;" \ 1075 _paravirt_alt("pushl %%ecx; pushl %%edx;" \
1011 "call *paravirt_ops+%c[paravirt_cli_type]*4;" \ 1076 "call *%[paravirt_cli_opptr];" \
1012 "popl %%edx; popl %%ecx", \ 1077 "popl %%edx; popl %%ecx", \
1013 "%c[paravirt_cli_type]", "%c[paravirt_clobber]") 1078 "%c[paravirt_cli_type]", "%c[paravirt_clobber]")
1014 1079
1015#define STI_STRING \ 1080#define STI_STRING \
1016 _paravirt_alt("pushl %%ecx; pushl %%edx;" \ 1081 _paravirt_alt("pushl %%ecx; pushl %%edx;" \
1017 "call *paravirt_ops+%c[paravirt_sti_type]*4;" \ 1082 "call *%[paravirt_sti_opptr];" \
1018 "popl %%edx; popl %%ecx", \ 1083 "popl %%edx; popl %%ecx", \
1019 "%c[paravirt_sti_type]", "%c[paravirt_clobber]") 1084 "%c[paravirt_sti_type]", "%c[paravirt_clobber]")
1020 1085
1021#define CLI_STI_CLOBBERS , "%eax" 1086#define CLI_STI_CLOBBERS , "%eax"
1022#define CLI_STI_INPUT_ARGS \ 1087#define CLI_STI_INPUT_ARGS \
1023 , \ 1088 , \
1024 [paravirt_cli_type] "i" (PARAVIRT_PATCH(irq_disable)), \ 1089 [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)), \
1025 [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)), \ 1090 [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable), \
1091 [paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)), \
1092 [paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable), \
1026 paravirt_clobber(CLBR_EAX) 1093 paravirt_clobber(CLBR_EAX)
1027 1094
1028/* Make sure as little as possible of this mess escapes. */ 1095/* Make sure as little as possible of this mess escapes. */
@@ -1042,7 +1109,7 @@ static inline unsigned long __raw_local_irq_save(void)
1042 1109
1043#else /* __ASSEMBLY__ */ 1110#else /* __ASSEMBLY__ */
1044 1111
1045#define PARA_PATCH(off) ((off) / 4) 1112#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
1046 1113
1047#define PARA_SITE(ptype, clobbers, ops) \ 1114#define PARA_SITE(ptype, clobbers, ops) \
1048771:; \ 1115771:; \
@@ -1055,29 +1122,29 @@ static inline unsigned long __raw_local_irq_save(void)
1055 .short clobbers; \ 1122 .short clobbers; \
1056 .popsection 1123 .popsection
1057 1124
1058#define INTERRUPT_RETURN \ 1125#define INTERRUPT_RETURN \
1059 PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_NONE, \ 1126 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
1060 jmp *%cs:paravirt_ops+PARAVIRT_iret) 1127 jmp *%cs:pv_cpu_ops+PV_CPU_iret)
1061 1128
1062#define DISABLE_INTERRUPTS(clobbers) \ 1129#define DISABLE_INTERRUPTS(clobbers) \
1063 PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers, \ 1130 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
1064 pushl %eax; pushl %ecx; pushl %edx; \ 1131 pushl %eax; pushl %ecx; pushl %edx; \
1065 call *%cs:paravirt_ops+PARAVIRT_irq_disable; \ 1132 call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \
1066 popl %edx; popl %ecx; popl %eax) \ 1133 popl %edx; popl %ecx; popl %eax) \
1067 1134
1068#define ENABLE_INTERRUPTS(clobbers) \ 1135#define ENABLE_INTERRUPTS(clobbers) \
1069 PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers, \ 1136 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
1070 pushl %eax; pushl %ecx; pushl %edx; \ 1137 pushl %eax; pushl %ecx; pushl %edx; \
1071 call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ 1138 call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \
1072 popl %edx; popl %ecx; popl %eax) 1139 popl %edx; popl %ecx; popl %eax)
1073 1140
1074#define ENABLE_INTERRUPTS_SYSEXIT \ 1141#define ENABLE_INTERRUPTS_SYSEXIT \
1075 PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_NONE, \ 1142 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
1076 jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) 1143 jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
1077 1144
1078#define GET_CR0_INTO_EAX \ 1145#define GET_CR0_INTO_EAX \
1079 push %ecx; push %edx; \ 1146 push %ecx; push %edx; \
1080 call *paravirt_ops+PARAVIRT_read_cr0; \ 1147 call *pv_cpu_ops+PV_CPU_read_cr0; \
1081 pop %edx; pop %ecx 1148 pop %edx; pop %ecx
1082 1149
1083#endif /* __ASSEMBLY__ */ 1150#endif /* __ASSEMBLY__ */
diff --git a/include/asm-x86/pgtable-3level-defs.h b/include/asm-x86/pgtable-3level-defs.h
index c0df89f66e8b..448ac9516314 100644
--- a/include/asm-x86/pgtable-3level-defs.h
+++ b/include/asm-x86/pgtable-3level-defs.h
@@ -2,7 +2,7 @@
2#define _I386_PGTABLE_3LEVEL_DEFS_H 2#define _I386_PGTABLE_3LEVEL_DEFS_H
3 3
4#ifdef CONFIG_PARAVIRT 4#ifdef CONFIG_PARAVIRT
5#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd) 5#define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd)
6#else 6#else
7#define SHARED_KERNEL_PMD 1 7#define SHARED_KERNEL_PMD 1
8#endif 8#endif
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index ff61ea365997..b05d8a6d9143 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -160,8 +160,9 @@ struct vcpu_set_singleshot_timer {
160 */ 160 */
161#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ 161#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */
162struct vcpu_register_vcpu_info { 162struct vcpu_register_vcpu_info {
163 uint32_t mfn; /* mfn of page to place vcpu_info */ 163 uint64_t mfn; /* mfn of page to place vcpu_info */
164 uint32_t offset; /* offset within page */ 164 uint32_t offset; /* offset within page */
165 uint32_t rsvd; /* unused */
165}; 166};
166 167
167#endif /* __XEN_PUBLIC_VCPU_H__ */ 168#endif /* __XEN_PUBLIC_VCPU_H__ */
diff --git a/mm/Kconfig b/mm/Kconfig
index 1cc6cada2bbf..b1f03b0eb7f1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -155,7 +155,6 @@ config SPLIT_PTLOCK_CPUS
155 int 155 int
156 default "4096" if ARM && !CPU_CACHE_VIPT 156 default "4096" if ARM && !CPU_CACHE_VIPT
157 default "4096" if PARISC && !PA20 157 default "4096" if PARISC && !PA20
158 default "4096" if XEN
159 default "4" 158 default "4"
160 159
161# 160#