aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorTony Lindgren <tony@atomide.com>2011-05-24 03:45:06 -0400
committerTony Lindgren <tony@atomide.com>2011-05-24 03:45:06 -0400
commit9b28b11e2a648f07c8481b9666ccf1c088e1ab74 (patch)
treeac3db2d4ae69e393d8423bb8c9304c75023dc805 /arch
parentb7679ab3f70482ff4b75a8c735c8224ebedb6020 (diff)
parent99aa18278e867574d72201b806f82ace07d4804b (diff)
Merge branch 'for_2.6.40/pm-cleanup' of ssh://master.kernel.org/pub/scm/linux/kernel/git/khilman/linux-omap-pm into omap-for-linus
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/unistd.h6
-rw-r--r--arch/alpha/kernel/systbls.S12
-rw-r--r--arch/alpha/kernel/time.c3
-rw-r--r--arch/arm/mach-omap2/board-3430sdp.c19
-rw-r--r--arch/arm/mach-omap2/board-rx51.c18
-rw-r--r--arch/arm/mach-omap2/cpuidle34xx.c436
-rw-r--r--arch/arm/mach-omap2/pm.h17
-rw-r--r--arch/arm/mach-omap2/pm34xx.c14
-rw-r--r--arch/mips/ar7/gpio.c4
-rw-r--r--arch/mips/include/asm/dma-mapping.h2
-rw-r--r--arch/mips/kernel/traps.c6
-rw-r--r--arch/mips/rb532/gpio.c2
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c7
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c7
-rw-r--r--arch/sparc/kernel/apc.c2
-rw-r--r--arch/sparc/kernel/pci_sabre.c5
-rw-r--r--arch/sparc/kernel/pci_schizo.c8
-rw-r--r--arch/sparc/kernel/pmc.c2
-rw-r--r--arch/sparc/kernel/smp_32.c10
-rw-r--r--arch/sparc/kernel/time_32.c2
-rw-r--r--arch/sparc/lib/checksum_32.S12
-rw-r--r--arch/um/os-Linux/util.c23
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h17
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h16
-rw-r--r--arch/x86/include/asm/x86_init.h12
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c48
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c12
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/mm/init.c24
-rw-r--r--arch/x86/platform/uv/tlb_uv.c92
-rw-r--r--arch/x86/xen/mmu.c138
37 files changed, 451 insertions, 543 deletions
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937bf5a77..b1834166922d 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,14 @@
452#define __NR_fanotify_init 494 452#define __NR_fanotify_init 494
453#define __NR_fanotify_mark 495 453#define __NR_fanotify_mark 495
454#define __NR_prlimit64 496 454#define __NR_prlimit64 496
455#define __NR_name_to_handle_at 497
456#define __NR_open_by_handle_at 498
457#define __NR_clock_adjtime 499
458#define __NR_syncfs 500
455 459
456#ifdef __KERNEL__ 460#ifdef __KERNEL__
457 461
458#define NR_SYSCALLS 497 462#define NR_SYSCALLS 501
459 463
460#define __ARCH_WANT_IPC_PARSE_VERSION 464#define __ARCH_WANT_IPC_PARSE_VERSION
461#define __ARCH_WANT_OLD_READDIR 465#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9db16f..15f999d41c75 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -498,23 +498,27 @@ sys_call_table:
498 .quad sys_ni_syscall /* sys_timerfd */ 498 .quad sys_ni_syscall /* sys_timerfd */
499 .quad sys_eventfd 499 .quad sys_eventfd
500 .quad sys_recvmmsg 500 .quad sys_recvmmsg
501 .quad sys_fallocate /* 480 */ 501 .quad sys_fallocate /* 480 */
502 .quad sys_timerfd_create 502 .quad sys_timerfd_create
503 .quad sys_timerfd_settime 503 .quad sys_timerfd_settime
504 .quad sys_timerfd_gettime 504 .quad sys_timerfd_gettime
505 .quad sys_signalfd4 505 .quad sys_signalfd4
506 .quad sys_eventfd2 /* 485 */ 506 .quad sys_eventfd2 /* 485 */
507 .quad sys_epoll_create1 507 .quad sys_epoll_create1
508 .quad sys_dup3 508 .quad sys_dup3
509 .quad sys_pipe2 509 .quad sys_pipe2
510 .quad sys_inotify_init1 510 .quad sys_inotify_init1
511 .quad sys_preadv /* 490 */ 511 .quad sys_preadv /* 490 */
512 .quad sys_pwritev 512 .quad sys_pwritev
513 .quad sys_rt_tgsigqueueinfo 513 .quad sys_rt_tgsigqueueinfo
514 .quad sys_perf_event_open 514 .quad sys_perf_event_open
515 .quad sys_fanotify_init 515 .quad sys_fanotify_init
516 .quad sys_fanotify_mark /* 495 */ 516 .quad sys_fanotify_mark /* 495 */
517 .quad sys_prlimit64 517 .quad sys_prlimit64
518 .quad sys_name_to_handle_at
519 .quad sys_open_by_handle_at
520 .quad sys_clock_adjtime
521 .quad sys_syncfs /* 500 */
518 522
519 .size sys_call_table, . - sys_call_table 523 .size sys_call_table, . - sys_call_table
520 .type sys_call_table, @object 524 .type sys_call_table, @object
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 918e8e0b72ff..818e74ed45dc 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = {
375 375
376static inline void register_rpcc_clocksource(long cycle_freq) 376static inline void register_rpcc_clocksource(long cycle_freq)
377{ 377{
378 clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4); 378 clocksource_register_hz(&clocksource_rpcc, cycle_freq);
379 clocksource_register(&clocksource_rpcc);
380} 379}
381#else /* !CONFIG_SMP */ 380#else /* !CONFIG_SMP */
382static inline void register_rpcc_clocksource(long cycle_freq) 381static inline void register_rpcc_clocksource(long cycle_freq)
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 99218a5299ca..52dbdf3ab66c 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -59,24 +59,6 @@
59 59
60#define TWL4030_MSECURE_GPIO 22 60#define TWL4030_MSECURE_GPIO 22
61 61
62/* FIXME: These values need to be updated based on more profiling on 3430sdp*/
63static struct cpuidle_params omap3_cpuidle_params_table[] = {
64 /* C1 */
65 {1, 2, 2, 5},
66 /* C2 */
67 {1, 10, 10, 30},
68 /* C3 */
69 {1, 50, 50, 300},
70 /* C4 */
71 {1, 1500, 1800, 4000},
72 /* C5 */
73 {1, 2500, 7500, 12000},
74 /* C6 */
75 {1, 3000, 8500, 15000},
76 /* C7 */
77 {1, 10000, 30000, 300000},
78};
79
80static uint32_t board_keymap[] = { 62static uint32_t board_keymap[] = {
81 KEY(0, 0, KEY_LEFT), 63 KEY(0, 0, KEY_LEFT),
82 KEY(0, 1, KEY_RIGHT), 64 KEY(0, 1, KEY_RIGHT),
@@ -800,7 +782,6 @@ static void __init omap_3430sdp_init(void)
800 omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); 782 omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
801 omap_board_config = sdp3430_config; 783 omap_board_config = sdp3430_config;
802 omap_board_config_size = ARRAY_SIZE(sdp3430_config); 784 omap_board_config_size = ARRAY_SIZE(sdp3430_config);
803 omap3_pm_init_cpuidle(omap3_cpuidle_params_table);
804 omap3430_i2c_init(); 785 omap3430_i2c_init();
805 omap_display_init(&sdp3430_dss_data); 786 omap_display_init(&sdp3430_dss_data);
806 if (omap_rev() > OMAP3430_REV_ES1_0) 787 if (omap_rev() > OMAP3430_REV_ES1_0)
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index f8ba20a14e62..fec4cac8fa0a 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -58,21 +58,25 @@ static struct platform_device leds_gpio = {
58 }, 58 },
59}; 59};
60 60
61/*
62 * cpuidle C-states definition override from the default values.
63 * The 'exit_latency' field is the sum of sleep and wake-up latencies.
64 */
61static struct cpuidle_params rx51_cpuidle_params[] = { 65static struct cpuidle_params rx51_cpuidle_params[] = {
62 /* C1 */ 66 /* C1 */
63 {1, 110, 162, 5}, 67 {110 + 162, 5 , 1},
64 /* C2 */ 68 /* C2 */
65 {1, 106, 180, 309}, 69 {106 + 180, 309, 1},
66 /* C3 */ 70 /* C3 */
67 {0, 107, 410, 46057}, 71 {107 + 410, 46057, 0},
68 /* C4 */ 72 /* C4 */
69 {0, 121, 3374, 46057}, 73 {121 + 3374, 46057, 0},
70 /* C5 */ 74 /* C5 */
71 {1, 855, 1146, 46057}, 75 {855 + 1146, 46057, 1},
72 /* C6 */ 76 /* C6 */
73 {0, 7580, 4134, 484329}, 77 {7580 + 4134, 484329, 0},
74 /* C7 */ 78 /* C7 */
75 {1, 7505, 15274, 484329}, 79 {7505 + 15274, 484329, 1},
76}; 80};
77 81
78static struct omap_lcd_config rx51_lcd_config = { 82static struct omap_lcd_config rx51_lcd_config = {
diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index 1c240eff3918..4bf6e6e8b100 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -36,36 +36,6 @@
36 36
37#ifdef CONFIG_CPU_IDLE 37#ifdef CONFIG_CPU_IDLE
38 38
39#define OMAP3_MAX_STATES 7
40#define OMAP3_STATE_C1 0 /* C1 - MPU WFI + Core active */
41#define OMAP3_STATE_C2 1 /* C2 - MPU WFI + Core inactive */
42#define OMAP3_STATE_C3 2 /* C3 - MPU CSWR + Core inactive */
43#define OMAP3_STATE_C4 3 /* C4 - MPU OFF + Core iactive */
44#define OMAP3_STATE_C5 4 /* C5 - MPU RET + Core RET */
45#define OMAP3_STATE_C6 5 /* C6 - MPU OFF + Core RET */
46#define OMAP3_STATE_C7 6 /* C7 - MPU OFF + Core OFF */
47
48#define OMAP3_STATE_MAX OMAP3_STATE_C7
49
50#define CPUIDLE_FLAG_CHECK_BM 0x10000 /* use omap3_enter_idle_bm() */
51
52struct omap3_processor_cx {
53 u8 valid;
54 u8 type;
55 u32 sleep_latency;
56 u32 wakeup_latency;
57 u32 mpu_state;
58 u32 core_state;
59 u32 threshold;
60 u32 flags;
61 const char *desc;
62};
63
64struct omap3_processor_cx omap3_power_states[OMAP3_MAX_STATES];
65struct omap3_processor_cx current_cx_state;
66struct powerdomain *mpu_pd, *core_pd, *per_pd;
67struct powerdomain *cam_pd;
68
69/* 39/*
70 * The latencies/thresholds for various C states have 40 * The latencies/thresholds for various C states have
71 * to be configured from the respective board files. 41 * to be configured from the respective board files.
@@ -75,27 +45,31 @@ struct powerdomain *cam_pd;
75 */ 45 */
76static struct cpuidle_params cpuidle_params_table[] = { 46static struct cpuidle_params cpuidle_params_table[] = {
77 /* C1 */ 47 /* C1 */
78 {1, 2, 2, 5}, 48 {2 + 2, 5, 1},
79 /* C2 */ 49 /* C2 */
80 {1, 10, 10, 30}, 50 {10 + 10, 30, 1},
81 /* C3 */ 51 /* C3 */
82 {1, 50, 50, 300}, 52 {50 + 50, 300, 1},
83 /* C4 */ 53 /* C4 */
84 {1, 1500, 1800, 4000}, 54 {1500 + 1800, 4000, 1},
85 /* C5 */ 55 /* C5 */
86 {1, 2500, 7500, 12000}, 56 {2500 + 7500, 12000, 1},
87 /* C6 */ 57 /* C6 */
88 {1, 3000, 8500, 15000}, 58 {3000 + 8500, 15000, 1},
89 /* C7 */ 59 /* C7 */
90 {1, 10000, 30000, 300000}, 60 {10000 + 30000, 300000, 1},
91}; 61};
62#define OMAP3_NUM_STATES ARRAY_SIZE(cpuidle_params_table)
92 63
93static int omap3_idle_bm_check(void) 64/* Mach specific information to be recorded in the C-state driver_data */
94{ 65struct omap3_idle_statedata {
95 if (!omap3_can_sleep()) 66 u32 mpu_state;
96 return 1; 67 u32 core_state;
97 return 0; 68 u8 valid;
98} 69};
70struct omap3_idle_statedata omap3_idle_data[OMAP3_NUM_STATES];
71
72struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd;
99 73
100static int _cpuidle_allow_idle(struct powerdomain *pwrdm, 74static int _cpuidle_allow_idle(struct powerdomain *pwrdm,
101 struct clockdomain *clkdm) 75 struct clockdomain *clkdm)
@@ -122,12 +96,10 @@ static int _cpuidle_deny_idle(struct powerdomain *pwrdm,
122static int omap3_enter_idle(struct cpuidle_device *dev, 96static int omap3_enter_idle(struct cpuidle_device *dev,
123 struct cpuidle_state *state) 97 struct cpuidle_state *state)
124{ 98{
125 struct omap3_processor_cx *cx = cpuidle_get_statedata(state); 99 struct omap3_idle_statedata *cx = cpuidle_get_statedata(state);
126 struct timespec ts_preidle, ts_postidle, ts_idle; 100 struct timespec ts_preidle, ts_postidle, ts_idle;
127 u32 mpu_state = cx->mpu_state, core_state = cx->core_state; 101 u32 mpu_state = cx->mpu_state, core_state = cx->core_state;
128 102
129 current_cx_state = *cx;
130
131 /* Used to keep track of the total time in idle */ 103 /* Used to keep track of the total time in idle */
132 getnstimeofday(&ts_preidle); 104 getnstimeofday(&ts_preidle);
133 105
@@ -140,7 +112,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev,
140 if (omap_irq_pending() || need_resched()) 112 if (omap_irq_pending() || need_resched())
141 goto return_sleep_time; 113 goto return_sleep_time;
142 114
143 if (cx->type == OMAP3_STATE_C1) { 115 /* Deny idle for C1 */
116 if (state == &dev->states[0]) {
144 pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle); 117 pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle);
145 pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle); 118 pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle);
146 } 119 }
@@ -148,7 +121,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev,
148 /* Execute ARM wfi */ 121 /* Execute ARM wfi */
149 omap_sram_idle(); 122 omap_sram_idle();
150 123
151 if (cx->type == OMAP3_STATE_C1) { 124 /* Re-allow idle for C1 */
125 if (state == &dev->states[0]) {
152 pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle); 126 pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle);
153 pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle); 127 pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle);
154 } 128 }
@@ -164,41 +138,53 @@ return_sleep_time:
164} 138}
165 139
166/** 140/**
167 * next_valid_state - Find next valid c-state 141 * next_valid_state - Find next valid C-state
168 * @dev: cpuidle device 142 * @dev: cpuidle device
169 * @state: Currently selected c-state 143 * @state: Currently selected C-state
170 * 144 *
171 * If the current state is valid, it is returned back to the caller. 145 * If the current state is valid, it is returned back to the caller.
172 * Else, this function searches for a lower c-state which is still 146 * Else, this function searches for a lower c-state which is still
173 * valid (as defined in omap3_power_states[]). 147 * valid.
148 *
149 * A state is valid if the 'valid' field is enabled and
150 * if it satisfies the enable_off_mode condition.
174 */ 151 */
175static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, 152static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev,
176 struct cpuidle_state *curr) 153 struct cpuidle_state *curr)
177{ 154{
178 struct cpuidle_state *next = NULL; 155 struct cpuidle_state *next = NULL;
179 struct omap3_processor_cx *cx; 156 struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr);
157 u32 mpu_deepest_state = PWRDM_POWER_RET;
158 u32 core_deepest_state = PWRDM_POWER_RET;
180 159
181 cx = (struct omap3_processor_cx *)cpuidle_get_statedata(curr); 160 if (enable_off_mode) {
161 mpu_deepest_state = PWRDM_POWER_OFF;
162 /*
163 * Erratum i583: valable for ES rev < Es1.2 on 3630.
164 * CORE OFF mode is not supported in a stable form, restrict
165 * instead the CORE state to RET.
166 */
167 if (!IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583))
168 core_deepest_state = PWRDM_POWER_OFF;
169 }
182 170
183 /* Check if current state is valid */ 171 /* Check if current state is valid */
184 if (cx->valid) { 172 if ((cx->valid) &&
173 (cx->mpu_state >= mpu_deepest_state) &&
174 (cx->core_state >= core_deepest_state)) {
185 return curr; 175 return curr;
186 } else { 176 } else {
187 u8 idx = OMAP3_STATE_MAX; 177 int idx = OMAP3_NUM_STATES - 1;
188 178
189 /* 179 /* Reach the current state starting at highest C-state */
190 * Reach the current state starting at highest C-state 180 for (; idx >= 0; idx--) {
191 */
192 for (; idx >= OMAP3_STATE_C1; idx--) {
193 if (&dev->states[idx] == curr) { 181 if (&dev->states[idx] == curr) {
194 next = &dev->states[idx]; 182 next = &dev->states[idx];
195 break; 183 break;
196 } 184 }
197 } 185 }
198 186
199 /* 187 /* Should never hit this condition */
200 * Should never hit this condition.
201 */
202 WARN_ON(next == NULL); 188 WARN_ON(next == NULL);
203 189
204 /* 190 /*
@@ -206,17 +192,17 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev,
206 * Start search from the next (lower) state. 192 * Start search from the next (lower) state.
207 */ 193 */
208 idx--; 194 idx--;
209 for (; idx >= OMAP3_STATE_C1; idx--) { 195 for (; idx >= 0; idx--) {
210 struct omap3_processor_cx *cx;
211
212 cx = cpuidle_get_statedata(&dev->states[idx]); 196 cx = cpuidle_get_statedata(&dev->states[idx]);
213 if (cx->valid) { 197 if ((cx->valid) &&
198 (cx->mpu_state >= mpu_deepest_state) &&
199 (cx->core_state >= core_deepest_state)) {
214 next = &dev->states[idx]; 200 next = &dev->states[idx];
215 break; 201 break;
216 } 202 }
217 } 203 }
218 /* 204 /*
219 * C1 and C2 are always valid. 205 * C1 is always valid.
220 * So, no need to check for 'next==NULL' outside this loop. 206 * So, no need to check for 'next==NULL' outside this loop.
221 */ 207 */
222 } 208 }
@@ -229,36 +215,22 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev,
229 * @dev: cpuidle device 215 * @dev: cpuidle device
230 * @state: The target state to be programmed 216 * @state: The target state to be programmed
231 * 217 *
232 * Used for C states with CPUIDLE_FLAG_CHECK_BM flag set. This 218 * This function checks for any pending activity and then programs
233 * function checks for any pending activity and then programs the 219 * the device to the specified or a safer state.
234 * device to the specified or a safer state.
235 */ 220 */
236static int omap3_enter_idle_bm(struct cpuidle_device *dev, 221static int omap3_enter_idle_bm(struct cpuidle_device *dev,
237 struct cpuidle_state *state) 222 struct cpuidle_state *state)
238{ 223{
239 struct cpuidle_state *new_state = next_valid_state(dev, state); 224 struct cpuidle_state *new_state;
240 u32 core_next_state, per_next_state = 0, per_saved_state = 0; 225 u32 core_next_state, per_next_state = 0, per_saved_state = 0, cam_state;
241 u32 cam_state; 226 struct omap3_idle_statedata *cx;
242 struct omap3_processor_cx *cx;
243 int ret; 227 int ret;
244 228
245 if ((state->flags & CPUIDLE_FLAG_CHECK_BM) && omap3_idle_bm_check()) { 229 if (!omap3_can_sleep()) {
246 BUG_ON(!dev->safe_state);
247 new_state = dev->safe_state; 230 new_state = dev->safe_state;
248 goto select_state; 231 goto select_state;
249 } 232 }
250 233
251 cx = cpuidle_get_statedata(state);
252 core_next_state = cx->core_state;
253
254 /*
255 * FIXME: we currently manage device-specific idle states
256 * for PER and CORE in combination with CPU-specific
257 * idle states. This is wrong, and device-specific
258 * idle management needs to be separated out into
259 * its own code.
260 */
261
262 /* 234 /*
263 * Prevent idle completely if CAM is active. 235 * Prevent idle completely if CAM is active.
264 * CAM does not have wakeup capability in OMAP3. 236 * CAM does not have wakeup capability in OMAP3.
@@ -270,9 +242,19 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev,
270 } 242 }
271 243
272 /* 244 /*
245 * FIXME: we currently manage device-specific idle states
246 * for PER and CORE in combination with CPU-specific
247 * idle states. This is wrong, and device-specific
248 * idle management needs to be separated out into
249 * its own code.
250 */
251
252 /*
273 * Prevent PER off if CORE is not in retention or off as this 253 * Prevent PER off if CORE is not in retention or off as this
274 * would disable PER wakeups completely. 254 * would disable PER wakeups completely.
275 */ 255 */
256 cx = cpuidle_get_statedata(state);
257 core_next_state = cx->core_state;
276 per_next_state = per_saved_state = pwrdm_read_next_pwrst(per_pd); 258 per_next_state = per_saved_state = pwrdm_read_next_pwrst(per_pd);
277 if ((per_next_state == PWRDM_POWER_OFF) && 259 if ((per_next_state == PWRDM_POWER_OFF) &&
278 (core_next_state > PWRDM_POWER_RET)) 260 (core_next_state > PWRDM_POWER_RET))
@@ -282,6 +264,8 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev,
282 if (per_next_state != per_saved_state) 264 if (per_next_state != per_saved_state)
283 pwrdm_set_next_pwrst(per_pd, per_next_state); 265 pwrdm_set_next_pwrst(per_pd, per_next_state);
284 266
267 new_state = next_valid_state(dev, state);
268
285select_state: 269select_state:
286 dev->last_state = new_state; 270 dev->last_state = new_state;
287 ret = omap3_enter_idle(dev, new_state); 271 ret = omap3_enter_idle(dev, new_state);
@@ -295,31 +279,6 @@ select_state:
295 279
296DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev); 280DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev);
297 281
298/**
299 * omap3_cpuidle_update_states() - Update the cpuidle states
300 * @mpu_deepest_state: Enable states up to and including this for mpu domain
301 * @core_deepest_state: Enable states up to and including this for core domain
302 *
303 * This goes through the list of states available and enables and disables the
304 * validity of C states based on deepest state that can be achieved for the
305 * variable domain
306 */
307void omap3_cpuidle_update_states(u32 mpu_deepest_state, u32 core_deepest_state)
308{
309 int i;
310
311 for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) {
312 struct omap3_processor_cx *cx = &omap3_power_states[i];
313
314 if ((cx->mpu_state >= mpu_deepest_state) &&
315 (cx->core_state >= core_deepest_state)) {
316 cx->valid = 1;
317 } else {
318 cx->valid = 0;
319 }
320 }
321}
322
323void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) 282void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params)
324{ 283{
325 int i; 284 int i;
@@ -327,212 +286,109 @@ void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params)
327 if (!cpuidle_board_params) 286 if (!cpuidle_board_params)
328 return; 287 return;
329 288
330 for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { 289 for (i = 0; i < OMAP3_NUM_STATES; i++) {
331 cpuidle_params_table[i].valid = 290 cpuidle_params_table[i].valid = cpuidle_board_params[i].valid;
332 cpuidle_board_params[i].valid; 291 cpuidle_params_table[i].exit_latency =
333 cpuidle_params_table[i].sleep_latency = 292 cpuidle_board_params[i].exit_latency;
334 cpuidle_board_params[i].sleep_latency; 293 cpuidle_params_table[i].target_residency =
335 cpuidle_params_table[i].wake_latency = 294 cpuidle_board_params[i].target_residency;
336 cpuidle_board_params[i].wake_latency;
337 cpuidle_params_table[i].threshold =
338 cpuidle_board_params[i].threshold;
339 } 295 }
340 return; 296 return;
341} 297}
342 298
343/* omap3_init_power_states - Initialises the OMAP3 specific C states.
344 *
345 * Below is the desciption of each C state.
346 * C1 . MPU WFI + Core active
347 * C2 . MPU WFI + Core inactive
348 * C3 . MPU CSWR + Core inactive
349 * C4 . MPU OFF + Core inactive
350 * C5 . MPU CSWR + Core CSWR
351 * C6 . MPU OFF + Core CSWR
352 * C7 . MPU OFF + Core OFF
353 */
354void omap_init_power_states(void)
355{
356 /* C1 . MPU WFI + Core active */
357 omap3_power_states[OMAP3_STATE_C1].valid =
358 cpuidle_params_table[OMAP3_STATE_C1].valid;
359 omap3_power_states[OMAP3_STATE_C1].type = OMAP3_STATE_C1;
360 omap3_power_states[OMAP3_STATE_C1].sleep_latency =
361 cpuidle_params_table[OMAP3_STATE_C1].sleep_latency;
362 omap3_power_states[OMAP3_STATE_C1].wakeup_latency =
363 cpuidle_params_table[OMAP3_STATE_C1].wake_latency;
364 omap3_power_states[OMAP3_STATE_C1].threshold =
365 cpuidle_params_table[OMAP3_STATE_C1].threshold;
366 omap3_power_states[OMAP3_STATE_C1].mpu_state = PWRDM_POWER_ON;
367 omap3_power_states[OMAP3_STATE_C1].core_state = PWRDM_POWER_ON;
368 omap3_power_states[OMAP3_STATE_C1].flags = CPUIDLE_FLAG_TIME_VALID;
369 omap3_power_states[OMAP3_STATE_C1].desc = "MPU ON + CORE ON";
370
371 /* C2 . MPU WFI + Core inactive */
372 omap3_power_states[OMAP3_STATE_C2].valid =
373 cpuidle_params_table[OMAP3_STATE_C2].valid;
374 omap3_power_states[OMAP3_STATE_C2].type = OMAP3_STATE_C2;
375 omap3_power_states[OMAP3_STATE_C2].sleep_latency =
376 cpuidle_params_table[OMAP3_STATE_C2].sleep_latency;
377 omap3_power_states[OMAP3_STATE_C2].wakeup_latency =
378 cpuidle_params_table[OMAP3_STATE_C2].wake_latency;
379 omap3_power_states[OMAP3_STATE_C2].threshold =
380 cpuidle_params_table[OMAP3_STATE_C2].threshold;
381 omap3_power_states[OMAP3_STATE_C2].mpu_state = PWRDM_POWER_ON;
382 omap3_power_states[OMAP3_STATE_C2].core_state = PWRDM_POWER_ON;
383 omap3_power_states[OMAP3_STATE_C2].flags = CPUIDLE_FLAG_TIME_VALID |
384 CPUIDLE_FLAG_CHECK_BM;
385 omap3_power_states[OMAP3_STATE_C2].desc = "MPU ON + CORE ON";
386
387 /* C3 . MPU CSWR + Core inactive */
388 omap3_power_states[OMAP3_STATE_C3].valid =
389 cpuidle_params_table[OMAP3_STATE_C3].valid;
390 omap3_power_states[OMAP3_STATE_C3].type = OMAP3_STATE_C3;
391 omap3_power_states[OMAP3_STATE_C3].sleep_latency =
392 cpuidle_params_table[OMAP3_STATE_C3].sleep_latency;
393 omap3_power_states[OMAP3_STATE_C3].wakeup_latency =
394 cpuidle_params_table[OMAP3_STATE_C3].wake_latency;
395 omap3_power_states[OMAP3_STATE_C3].threshold =
396 cpuidle_params_table[OMAP3_STATE_C3].threshold;
397 omap3_power_states[OMAP3_STATE_C3].mpu_state = PWRDM_POWER_RET;
398 omap3_power_states[OMAP3_STATE_C3].core_state = PWRDM_POWER_ON;
399 omap3_power_states[OMAP3_STATE_C3].flags = CPUIDLE_FLAG_TIME_VALID |
400 CPUIDLE_FLAG_CHECK_BM;
401 omap3_power_states[OMAP3_STATE_C3].desc = "MPU RET + CORE ON";
402
403 /* C4 . MPU OFF + Core inactive */
404 omap3_power_states[OMAP3_STATE_C4].valid =
405 cpuidle_params_table[OMAP3_STATE_C4].valid;
406 omap3_power_states[OMAP3_STATE_C4].type = OMAP3_STATE_C4;
407 omap3_power_states[OMAP3_STATE_C4].sleep_latency =
408 cpuidle_params_table[OMAP3_STATE_C4].sleep_latency;
409 omap3_power_states[OMAP3_STATE_C4].wakeup_latency =
410 cpuidle_params_table[OMAP3_STATE_C4].wake_latency;
411 omap3_power_states[OMAP3_STATE_C4].threshold =
412 cpuidle_params_table[OMAP3_STATE_C4].threshold;
413 omap3_power_states[OMAP3_STATE_C4].mpu_state = PWRDM_POWER_OFF;
414 omap3_power_states[OMAP3_STATE_C4].core_state = PWRDM_POWER_ON;
415 omap3_power_states[OMAP3_STATE_C4].flags = CPUIDLE_FLAG_TIME_VALID |
416 CPUIDLE_FLAG_CHECK_BM;
417 omap3_power_states[OMAP3_STATE_C4].desc = "MPU OFF + CORE ON";
418
419 /* C5 . MPU CSWR + Core CSWR*/
420 omap3_power_states[OMAP3_STATE_C5].valid =
421 cpuidle_params_table[OMAP3_STATE_C5].valid;
422 omap3_power_states[OMAP3_STATE_C5].type = OMAP3_STATE_C5;
423 omap3_power_states[OMAP3_STATE_C5].sleep_latency =
424 cpuidle_params_table[OMAP3_STATE_C5].sleep_latency;
425 omap3_power_states[OMAP3_STATE_C5].wakeup_latency =
426 cpuidle_params_table[OMAP3_STATE_C5].wake_latency;
427 omap3_power_states[OMAP3_STATE_C5].threshold =
428 cpuidle_params_table[OMAP3_STATE_C5].threshold;
429 omap3_power_states[OMAP3_STATE_C5].mpu_state = PWRDM_POWER_RET;
430 omap3_power_states[OMAP3_STATE_C5].core_state = PWRDM_POWER_RET;
431 omap3_power_states[OMAP3_STATE_C5].flags = CPUIDLE_FLAG_TIME_VALID |
432 CPUIDLE_FLAG_CHECK_BM;
433 omap3_power_states[OMAP3_STATE_C5].desc = "MPU RET + CORE RET";
434
435 /* C6 . MPU OFF + Core CSWR */
436 omap3_power_states[OMAP3_STATE_C6].valid =
437 cpuidle_params_table[OMAP3_STATE_C6].valid;
438 omap3_power_states[OMAP3_STATE_C6].type = OMAP3_STATE_C6;
439 omap3_power_states[OMAP3_STATE_C6].sleep_latency =
440 cpuidle_params_table[OMAP3_STATE_C6].sleep_latency;
441 omap3_power_states[OMAP3_STATE_C6].wakeup_latency =
442 cpuidle_params_table[OMAP3_STATE_C6].wake_latency;
443 omap3_power_states[OMAP3_STATE_C6].threshold =
444 cpuidle_params_table[OMAP3_STATE_C6].threshold;
445 omap3_power_states[OMAP3_STATE_C6].mpu_state = PWRDM_POWER_OFF;
446 omap3_power_states[OMAP3_STATE_C6].core_state = PWRDM_POWER_RET;
447 omap3_power_states[OMAP3_STATE_C6].flags = CPUIDLE_FLAG_TIME_VALID |
448 CPUIDLE_FLAG_CHECK_BM;
449 omap3_power_states[OMAP3_STATE_C6].desc = "MPU OFF + CORE RET";
450
451 /* C7 . MPU OFF + Core OFF */
452 omap3_power_states[OMAP3_STATE_C7].valid =
453 cpuidle_params_table[OMAP3_STATE_C7].valid;
454 omap3_power_states[OMAP3_STATE_C7].type = OMAP3_STATE_C7;
455 omap3_power_states[OMAP3_STATE_C7].sleep_latency =
456 cpuidle_params_table[OMAP3_STATE_C7].sleep_latency;
457 omap3_power_states[OMAP3_STATE_C7].wakeup_latency =
458 cpuidle_params_table[OMAP3_STATE_C7].wake_latency;
459 omap3_power_states[OMAP3_STATE_C7].threshold =
460 cpuidle_params_table[OMAP3_STATE_C7].threshold;
461 omap3_power_states[OMAP3_STATE_C7].mpu_state = PWRDM_POWER_OFF;
462 omap3_power_states[OMAP3_STATE_C7].core_state = PWRDM_POWER_OFF;
463 omap3_power_states[OMAP3_STATE_C7].flags = CPUIDLE_FLAG_TIME_VALID |
464 CPUIDLE_FLAG_CHECK_BM;
465 omap3_power_states[OMAP3_STATE_C7].desc = "MPU OFF + CORE OFF";
466
467 /*
468 * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot
469 * enable OFF mode in a stable form for previous revisions.
470 * we disable C7 state as a result.
471 */
472 if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) {
473 omap3_power_states[OMAP3_STATE_C7].valid = 0;
474 cpuidle_params_table[OMAP3_STATE_C7].valid = 0;
475 pr_warn("%s: core off state C7 disabled due to i583\n",
476 __func__);
477 }
478}
479
480struct cpuidle_driver omap3_idle_driver = { 299struct cpuidle_driver omap3_idle_driver = {
481 .name = "omap3_idle", 300 .name = "omap3_idle",
482 .owner = THIS_MODULE, 301 .owner = THIS_MODULE,
483}; 302};
484 303
304/* Helper to fill the C-state common data and register the driver_data */
305static inline struct omap3_idle_statedata *_fill_cstate(
306 struct cpuidle_device *dev,
307 int idx, const char *descr)
308{
309 struct omap3_idle_statedata *cx = &omap3_idle_data[idx];
310 struct cpuidle_state *state = &dev->states[idx];
311
312 state->exit_latency = cpuidle_params_table[idx].exit_latency;
313 state->target_residency = cpuidle_params_table[idx].target_residency;
314 state->flags = CPUIDLE_FLAG_TIME_VALID;
315 state->enter = omap3_enter_idle_bm;
316 cx->valid = cpuidle_params_table[idx].valid;
317 sprintf(state->name, "C%d", idx + 1);
318 strncpy(state->desc, descr, CPUIDLE_DESC_LEN);
319 cpuidle_set_statedata(state, cx);
320
321 return cx;
322}
323
485/** 324/**
486 * omap3_idle_init - Init routine for OMAP3 idle 325 * omap3_idle_init - Init routine for OMAP3 idle
487 * 326 *
488 * Registers the OMAP3 specific cpuidle driver with the cpuidle 327 * Registers the OMAP3 specific cpuidle driver to the cpuidle
489 * framework with the valid set of states. 328 * framework with the valid set of states.
490 */ 329 */
491int __init omap3_idle_init(void) 330int __init omap3_idle_init(void)
492{ 331{
493 int i, count = 0;
494 struct omap3_processor_cx *cx;
495 struct cpuidle_state *state;
496 struct cpuidle_device *dev; 332 struct cpuidle_device *dev;
333 struct omap3_idle_statedata *cx;
497 334
498 mpu_pd = pwrdm_lookup("mpu_pwrdm"); 335 mpu_pd = pwrdm_lookup("mpu_pwrdm");
499 core_pd = pwrdm_lookup("core_pwrdm"); 336 core_pd = pwrdm_lookup("core_pwrdm");
500 per_pd = pwrdm_lookup("per_pwrdm"); 337 per_pd = pwrdm_lookup("per_pwrdm");
501 cam_pd = pwrdm_lookup("cam_pwrdm"); 338 cam_pd = pwrdm_lookup("cam_pwrdm");
502 339
503 omap_init_power_states();
504 cpuidle_register_driver(&omap3_idle_driver); 340 cpuidle_register_driver(&omap3_idle_driver);
505
506 dev = &per_cpu(omap3_idle_dev, smp_processor_id()); 341 dev = &per_cpu(omap3_idle_dev, smp_processor_id());
507 342
508 for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { 343 /* C1 . MPU WFI + Core active */
509 cx = &omap3_power_states[i]; 344 cx = _fill_cstate(dev, 0, "MPU ON + CORE ON");
510 state = &dev->states[count]; 345 (&dev->states[0])->enter = omap3_enter_idle;
511 346 dev->safe_state = &dev->states[0];
512 if (!cx->valid) 347 cx->valid = 1; /* C1 is always valid */
513 continue; 348 cx->mpu_state = PWRDM_POWER_ON;
514 cpuidle_set_statedata(state, cx); 349 cx->core_state = PWRDM_POWER_ON;
515 state->exit_latency = cx->sleep_latency + cx->wakeup_latency;
516 state->target_residency = cx->threshold;
517 state->flags = cx->flags;
518 state->enter = (state->flags & CPUIDLE_FLAG_CHECK_BM) ?
519 omap3_enter_idle_bm : omap3_enter_idle;
520 if (cx->type == OMAP3_STATE_C1)
521 dev->safe_state = state;
522 sprintf(state->name, "C%d", count+1);
523 strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
524 count++;
525 }
526 350
527 if (!count) 351 /* C2 . MPU WFI + Core inactive */
528 return -EINVAL; 352 cx = _fill_cstate(dev, 1, "MPU ON + CORE ON");
529 dev->state_count = count; 353 cx->mpu_state = PWRDM_POWER_ON;
354 cx->core_state = PWRDM_POWER_ON;
355
356 /* C3 . MPU CSWR + Core inactive */
357 cx = _fill_cstate(dev, 2, "MPU RET + CORE ON");
358 cx->mpu_state = PWRDM_POWER_RET;
359 cx->core_state = PWRDM_POWER_ON;
530 360
531 if (enable_off_mode) 361 /* C4 . MPU OFF + Core inactive */
532 omap3_cpuidle_update_states(PWRDM_POWER_OFF, PWRDM_POWER_OFF); 362 cx = _fill_cstate(dev, 3, "MPU OFF + CORE ON");
533 else 363 cx->mpu_state = PWRDM_POWER_OFF;
534 omap3_cpuidle_update_states(PWRDM_POWER_RET, PWRDM_POWER_RET); 364 cx->core_state = PWRDM_POWER_ON;
365
366 /* C5 . MPU RET + Core RET */
367 cx = _fill_cstate(dev, 4, "MPU RET + CORE RET");
368 cx->mpu_state = PWRDM_POWER_RET;
369 cx->core_state = PWRDM_POWER_RET;
370
371 /* C6 . MPU OFF + Core RET */
372 cx = _fill_cstate(dev, 5, "MPU OFF + CORE RET");
373 cx->mpu_state = PWRDM_POWER_OFF;
374 cx->core_state = PWRDM_POWER_RET;
375
376 /* C7 . MPU OFF + Core OFF */
377 cx = _fill_cstate(dev, 6, "MPU OFF + CORE OFF");
378 /*
379 * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot
380 * enable OFF mode in a stable form for previous revisions.
381 * We disable C7 state as a result.
382 */
383 if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) {
384 cx->valid = 0;
385 pr_warn("%s: core off state C7 disabled due to i583\n",
386 __func__);
387 }
388 cx->mpu_state = PWRDM_POWER_OFF;
389 cx->core_state = PWRDM_POWER_OFF;
535 390
391 dev->state_count = OMAP3_NUM_STATES;
536 if (cpuidle_register_device(dev)) { 392 if (cpuidle_register_device(dev)) {
537 printk(KERN_ERR "%s: CPUidle register device failed\n", 393 printk(KERN_ERR "%s: CPUidle register device failed\n",
538 __func__); 394 __func__);
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index 797bfd12b643..45bcfce77352 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -36,11 +36,16 @@ static inline int omap4_opp_init(void)
36} 36}
37#endif 37#endif
38 38
39/*
40 * cpuidle mach specific parameters
41 *
42 * The board code can override the default C-states definition using
43 * omap3_pm_init_cpuidle
44 */
39struct cpuidle_params { 45struct cpuidle_params {
40 u8 valid; 46 u32 exit_latency; /* exit_latency = sleep + wake-up latencies */
41 u32 sleep_latency; 47 u32 target_residency;
42 u32 wake_latency; 48 u8 valid; /* validates the C-state */
43 u32 threshold;
44}; 49};
45 50
46#if defined(CONFIG_PM) && defined(CONFIG_CPU_IDLE) 51#if defined(CONFIG_PM) && defined(CONFIG_CPU_IDLE)
@@ -73,10 +78,6 @@ extern u32 sleep_while_idle;
73#define sleep_while_idle 0 78#define sleep_while_idle 0
74#endif 79#endif
75 80
76#if defined(CONFIG_CPU_IDLE)
77extern void omap3_cpuidle_update_states(u32, u32);
78#endif
79
80#if defined(CONFIG_PM_DEBUG) && defined(CONFIG_DEBUG_FS) 81#if defined(CONFIG_PM_DEBUG) && defined(CONFIG_DEBUG_FS)
81extern void pm_dbg_update_time(struct powerdomain *pwrdm, int prev); 82extern void pm_dbg_update_time(struct powerdomain *pwrdm, int prev);
82extern int pm_dbg_regset_save(int reg_set); 83extern int pm_dbg_regset_save(int reg_set);
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 0c5e3a46a3ad..c155c9d1c82c 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -779,18 +779,6 @@ void omap3_pm_off_mode_enable(int enable)
779 else 779 else
780 state = PWRDM_POWER_RET; 780 state = PWRDM_POWER_RET;
781 781
782#ifdef CONFIG_CPU_IDLE
783 /*
784 * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot
785 * enable OFF mode in a stable form for previous revisions, restrict
786 * instead to RET
787 */
788 if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583))
789 omap3_cpuidle_update_states(state, PWRDM_POWER_RET);
790 else
791 omap3_cpuidle_update_states(state, state);
792#endif
793
794 list_for_each_entry(pwrst, &pwrst_list, node) { 782 list_for_each_entry(pwrst, &pwrst_list, node) {
795 if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583) && 783 if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583) &&
796 pwrst->pwrdm == core_pwrdm && 784 pwrst->pwrdm == core_pwrdm &&
@@ -895,8 +883,6 @@ static int __init omap3_pm_init(void)
895 883
896 pm_errata_configure(); 884 pm_errata_configure();
897 885
898 printk(KERN_ERR "Power Management for TI OMAP3.\n");
899
900 /* XXX prcm_setup_regs needs to be before enabling hw 886 /* XXX prcm_setup_regs needs to be before enabling hw
901 * supervised mode for powerdomains */ 887 * supervised mode for powerdomains */
902 prcm_setup_regs(); 888 prcm_setup_regs();
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index 425dfa5d6e12..bb571bcdb8f2 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -325,9 +325,7 @@ int __init ar7_gpio_init(void)
325 size = 0x1f; 325 size = 0x1f;
326 } 326 }
327 327
328 gpch->regs = ioremap_nocache(AR7_REGS_GPIO, 328 gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size);
329 AR7_REGS_GPIO + 0x10);
330
331 if (!gpch->regs) { 329 if (!gpch->regs) {
332 printk(KERN_ERR "%s: failed to ioremap regs\n", 330 printk(KERN_ERR "%s: failed to ioremap regs\n",
333 gpch->chip.label); 331 gpch->chip.label);
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 655f849bd08d..7aa37ddfca4b 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -5,7 +5,9 @@
5#include <asm/cache.h> 5#include <asm/cache.h>
6#include <asm-generic/dma-coherent.h> 6#include <asm-generic/dma-coherent.h>
7 7
8#ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */
8#include <dma-coherence.h> 9#include <dma-coherence.h>
10#endif
9 11
10extern struct dma_map_ops *mips_dma_map_ops; 12extern struct dma_map_ops *mips_dma_map_ops;
11 13
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 71350f7f2d88..e9b3af27d844 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs)
374 unsigned long dvpret = dvpe(); 374 unsigned long dvpret = dvpe();
375#endif /* CONFIG_MIPS_MT_SMTC */ 375#endif /* CONFIG_MIPS_MT_SMTC */
376 376
377 notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV); 377 if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
378 sig = 0;
378 379
379 console_verbose(); 380 console_verbose();
380 spin_lock_irq(&die_lock); 381 spin_lock_irq(&die_lock);
@@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs)
383 mips_mt_regdump(dvpret); 384 mips_mt_regdump(dvpret);
384#endif /* CONFIG_MIPS_MT_SMTC */ 385#endif /* CONFIG_MIPS_MT_SMTC */
385 386
386 if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
387 sig = 0;
388
389 printk("%s[#%d]:\n", str, ++die_counter); 387 printk("%s[#%d]:\n", str, ++die_counter);
390 show_registers(regs); 388 show_registers(regs);
391 add_taint(TAINT_DIE); 389 add_taint(TAINT_DIE);
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index 37de05d595e7..6c47dfeb7be3 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -185,7 +185,7 @@ int __init rb532_gpio_init(void)
185 struct resource *r; 185 struct resource *r;
186 186
187 r = rb532_gpio_reg0_res; 187 r = rb532_gpio_reg0_res;
188 rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start); 188 rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r));
189 189
190 if (!rb532_gpio_chip->regbase) { 190 if (!rb532_gpio_chip->regbase) {
191 printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); 191 printk(KERN_ERR "rb532: cannot remap GPIO register 0\n");
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 188272934cfb..104faa8aa23c 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
318 .end = mpc83xx_suspend_end, 318 .end = mpc83xx_suspend_end,
319}; 319};
320 320
321static struct of_device_id pmc_match[];
321static int pmc_probe(struct platform_device *ofdev) 322static int pmc_probe(struct platform_device *ofdev)
322{ 323{
324 const struct of_device_id *match;
323 struct device_node *np = ofdev->dev.of_node; 325 struct device_node *np = ofdev->dev.of_node;
324 struct resource res; 326 struct resource res;
325 struct pmc_type *type; 327 struct pmc_type *type;
326 int ret = 0; 328 int ret = 0;
327 329
328 if (!ofdev->dev.of_match) 330 match = of_match_device(pmc_match, &ofdev->dev);
331 if (!match)
329 return -EINVAL; 332 return -EINVAL;
330 333
331 type = ofdev->dev.of_match->data; 334 type = match->data;
332 335
333 if (!of_device_is_available(np)) 336 if (!of_device_is_available(np))
334 return -ENODEV; 337 return -ENODEV;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20f..01cd2f089512 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
304 return 0; 304 return 0;
305} 305}
306 306
307static const struct of_device_id fsl_of_msi_ids[];
307static int __devinit fsl_of_msi_probe(struct platform_device *dev) 308static int __devinit fsl_of_msi_probe(struct platform_device *dev)
308{ 309{
310 const struct of_device_id *match;
309 struct fsl_msi *msi; 311 struct fsl_msi *msi;
310 struct resource res; 312 struct resource res;
311 int err, i, j, irq_index, count; 313 int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
316 u32 offset; 318 u32 offset;
317 static const u32 all_avail[] = { 0, NR_MSI_IRQS }; 319 static const u32 all_avail[] = { 0, NR_MSI_IRQS };
318 320
319 if (!dev->dev.of_match) 321 match = of_match_device(fsl_of_msi_ids, &dev->dev);
322 if (!match)
320 return -EINVAL; 323 return -EINVAL;
321 features = dev->dev.of_match->data; 324 features = match->data;
322 325
323 printk(KERN_DEBUG "Setting up Freescale MSI support\n"); 326 printk(KERN_DEBUG "Setting up Freescale MSI support\n");
324 327
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index f679c57644d5..1e34f29e58bb 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op)
165 return 0; 165 return 0;
166} 166}
167 167
168static struct of_device_id __initdata apc_match[] = { 168static struct of_device_id apc_match[] = {
169 { 169 {
170 .name = APC_OBPNAME, 170 .name = APC_OBPNAME,
171 }, 171 },
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 948068a083fc..d1840dbdaa2f 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
452 sabre_scan_bus(pbm, &op->dev); 452 sabre_scan_bus(pbm, &op->dev);
453} 453}
454 454
455static const struct of_device_id sabre_match[];
455static int __devinit sabre_probe(struct platform_device *op) 456static int __devinit sabre_probe(struct platform_device *op)
456{ 457{
458 const struct of_device_id *match;
457 const struct linux_prom64_registers *pr_regs; 459 const struct linux_prom64_registers *pr_regs;
458 struct device_node *dp = op->dev.of_node; 460 struct device_node *dp = op->dev.of_node;
459 struct pci_pbm_info *pbm; 461 struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
463 const u32 *vdma; 465 const u32 *vdma;
464 u64 clear_irq; 466 u64 clear_irq;
465 467
466 hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL); 468 match = of_match_device(sabre_match, &op->dev);
469 hummingbird_p = match && (match->data != NULL);
467 if (!hummingbird_p) { 470 if (!hummingbird_p) {
468 struct device_node *cpu_dp; 471 struct device_node *cpu_dp;
469 472
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index fecfcb2063c8..283fbc329a43 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1458,11 +1458,15 @@ out_err:
1458 return err; 1458 return err;
1459} 1459}
1460 1460
1461static const struct of_device_id schizo_match[];
1461static int __devinit schizo_probe(struct platform_device *op) 1462static int __devinit schizo_probe(struct platform_device *op)
1462{ 1463{
1463 if (!op->dev.of_match) 1464 const struct of_device_id *match;
1465
1466 match = of_match_device(schizo_match, &op->dev);
1467 if (!match)
1464 return -EINVAL; 1468 return -EINVAL;
1465 return __schizo_init(op, (unsigned long) op->dev.of_match->data); 1469 return __schizo_init(op, (unsigned long)match->data);
1466} 1470}
1467 1471
1468/* The ordering of this table is very important. Some Tomatillo 1472/* The ordering of this table is very important. Some Tomatillo
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index 93d7b4465f8d..6a585d393580 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op)
69 return 0; 69 return 0;
70} 70}
71 71
72static struct of_device_id __initdata pmc_match[] = { 72static struct of_device_id pmc_match[] = {
73 { 73 {
74 .name = PMC_OBPNAME, 74 .name = PMC_OBPNAME,
75 }, 75 },
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb70858..850a1360c0d6 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE;
53void __cpuinit smp_store_cpu_info(int id) 53void __cpuinit smp_store_cpu_info(int id)
54{ 54{
55 int cpu_node; 55 int cpu_node;
56 int mid;
56 57
57 cpu_data(id).udelay_val = loops_per_jiffy; 58 cpu_data(id).udelay_val = loops_per_jiffy;
58 59
@@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id)
60 cpu_data(id).clock_tick = prom_getintdefault(cpu_node, 61 cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
61 "clock-frequency", 0); 62 "clock-frequency", 0);
62 cpu_data(id).prom_node = cpu_node; 63 cpu_data(id).prom_node = cpu_node;
63 cpu_data(id).mid = cpu_get_hwmid(cpu_node); 64 mid = cpu_get_hwmid(cpu_node);
64 65
65 if (cpu_data(id).mid < 0) 66 if (mid < 0) {
66 panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); 67 printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node);
68 mid = 0;
69 }
70 cpu_data(id).mid = mid;
67} 71}
68 72
69void __init smp_cpus_done(unsigned int max_cpus) 73void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 4e236391b635..96046a4024c2 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op)
168 return 0; 168 return 0;
169} 169}
170 170
171static struct of_device_id __initdata clock_match[] = { 171static struct of_device_id clock_match[] = {
172 { 172 {
173 .name = "eeprom", 173 .name = "eeprom",
174 }, 174 },
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 3632cb34e914..0084c3361e15 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -289,10 +289,16 @@ cc_end_cruft:
289 289
290 /* Also, handle the alignment code out of band. */ 290 /* Also, handle the alignment code out of band. */
291cc_dword_align: 291cc_dword_align:
292 cmp %g1, 6 292 cmp %g1, 16
293 bl,a ccte 293 bge 1f
294 srl %g1, 1, %o3
2952: cmp %o3, 0
296 be,a ccte
294 andcc %g1, 0xf, %o3 297 andcc %g1, 0xf, %o3
295 andcc %o0, 0x1, %g0 298 andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits)
299 be,a 2b
300 srl %o3, 1, %o3
3011: andcc %o0, 0x1, %g0
296 bne ccslow 302 bne ccslow
297 andcc %o0, 0x2, %g0 303 andcc %o0, 0x2, %g0
298 be 1f 304 be 1f
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 6ea77979531c..42827cafa6af 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -5,6 +5,7 @@
5 5
6#include <stdio.h> 6#include <stdio.h>
7#include <stdlib.h> 7#include <stdlib.h>
8#include <unistd.h>
8#include <errno.h> 9#include <errno.h>
9#include <signal.h> 10#include <signal.h>
10#include <string.h> 11#include <string.h>
@@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len)
75 host.release, host.version, host.machine); 76 host.release, host.version, host.machine);
76} 77}
77 78
79/*
80 * We cannot use glibc's abort(). It makes use of tgkill() which
81 * has no effect within UML's kernel threads.
82 * After that glibc would execute an invalid instruction to kill
83 * the calling process and UML crashes with SIGSEGV.
84 */
85static inline void __attribute__ ((noreturn)) uml_abort(void)
86{
87 sigset_t sig;
88
89 fflush(NULL);
90
91 if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT))
92 sigprocmask(SIG_UNBLOCK, &sig, 0);
93
94 for (;;)
95 if (kill(getpid(), SIGABRT) < 0)
96 exit(127);
97}
98
78void os_dump_core(void) 99void os_dump_core(void)
79{ 100{
80 int pid; 101 int pid;
@@ -116,5 +137,5 @@ void os_dump_core(void)
116 while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) 137 while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0)
117 os_kill_ptraced_process(pid, 0); 138 os_kill_ptraced_process(pid, 0);
118 139
119 abort(); 140 uml_abort();
120} 141}
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3e..34595d5e1038 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
78#define APIC_DEST_LOGICAL 0x00800 78#define APIC_DEST_LOGICAL 0x00800
79#define APIC_DEST_PHYSICAL 0x00000 79#define APIC_DEST_PHYSICAL 0x00000
80#define APIC_DM_FIXED 0x00000 80#define APIC_DM_FIXED 0x00000
81#define APIC_DM_FIXED_MASK 0x00700
81#define APIC_DM_LOWEST 0x00100 82#define APIC_DM_LOWEST 0x00100
82#define APIC_DM_SMI 0x00200 83#define APIC_DM_SMI 0x00200
83#define APIC_DM_REMRD 0x00300 84#define APIC_DM_REMRD 0x00300
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723d1f32..d56187c6b838 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
299/* Install a pte for a particular vaddr in kernel space. */ 299/* Install a pte for a particular vaddr in kernel space. */
300void set_pte_vaddr(unsigned long vaddr, pte_t pte); 300void set_pte_vaddr(unsigned long vaddr, pte_t pte);
301 301
302extern void native_pagetable_reserve(u64 start, u64 end);
302#ifdef CONFIG_X86_32 303#ifdef CONFIG_X86_32
303extern void native_pagetable_setup_start(pgd_t *base); 304extern void native_pagetable_setup_start(pgd_t *base);
304extern void native_pagetable_setup_done(pgd_t *base); 305extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c3..130f1eeee5fe 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
94/* after this # consecutive successes, bump up the throttle if it was lowered */ 94/* after this # consecutive successes, bump up the throttle if it was lowered */
95#define COMPLETE_THRESHOLD 5 95#define COMPLETE_THRESHOLD 5
96 96
97#define UV_LB_SUBNODEID 0x10
98
97/* 99/*
98 * number of entries in the destination side payload queue 100 * number of entries in the destination side payload queue
99 */ 101 */
@@ -124,7 +126,7 @@
124 * The distribution specification (32 bytes) is interpreted as a 256-bit 126 * The distribution specification (32 bytes) is interpreted as a 256-bit
125 * distribution vector. Adjacent bits correspond to consecutive even numbered 127 * distribution vector. Adjacent bits correspond to consecutive even numbered
126 * nodeIDs. The result of adding the index of a given bit to the 15-bit 128 * nodeIDs. The result of adding the index of a given bit to the 15-bit
127 * 'base_dest_nodeid' field of the header corresponds to the 129 * 'base_dest_nasid' field of the header corresponds to the
128 * destination nodeID associated with that specified bit. 130 * destination nodeID associated with that specified bit.
129 */ 131 */
130struct bau_target_uvhubmask { 132struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 178struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 179 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 180 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid of the */ 181 unsigned int base_dest_nasid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 182 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 183 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 184 /* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
378 unsigned long d_rcanceled; /* number of messages canceled by resets */ 380 unsigned long d_rcanceled; /* number of messages canceled by resets */
379}; 381};
380 382
383struct hub_and_pnode {
384 short uvhub;
385 short pnode;
386};
381/* 387/*
382 * one per-cpu; to locate the software tables 388 * one per-cpu; to locate the software tables
383 */ 389 */
@@ -399,10 +405,12 @@ struct bau_control {
399 int baudisabled; 405 int baudisabled;
400 int set_bau_off; 406 int set_bau_off;
401 short cpu; 407 short cpu;
408 short osnode;
402 short uvhub_cpu; 409 short uvhub_cpu;
403 short uvhub; 410 short uvhub;
404 short cpus_in_socket; 411 short cpus_in_socket;
405 short cpus_in_uvhub; 412 short cpus_in_uvhub;
413 short partition_base_pnode;
406 unsigned short message_number; 414 unsigned short message_number;
407 unsigned short uvhub_quiesce; 415 unsigned short uvhub_quiesce;
408 short socket_acknowledge_count[DEST_Q_SIZE]; 416 short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
422 int congested_period; 430 int congested_period;
423 cycles_t period_time; 431 cycles_t period_time;
424 long period_requests; 432 long period_requests;
433 struct hub_and_pnode *target_hub_and_pnode;
425}; 434};
426 435
427static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) 436static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
428{ 437{
429 return constant_test_bit(uvhub, &dstp->bits[0]); 438 return constant_test_bit(uvhub, &dstp->bits[0]);
430} 439}
431static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) 440static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
432{ 441{
433 __set_bit(uvhub, &dstp->bits[0]); 442 __set_bit(pnode, &dstp->bits[0]);
434} 443}
435static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, 444static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
436 int nbits) 445 int nbits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c2335..4298002d0c83 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
398 unsigned short nr_online_cpus; 398 unsigned short nr_online_cpus;
399 unsigned short pnode; 399 unsigned short pnode;
400 short memory_nid; 400 short memory_nid;
401 spinlock_t nmi_lock;
402 unsigned long nmi_count;
401}; 403};
402extern struct uv_blade_info *uv_blade_info; 404extern struct uv_blade_info *uv_blade_info;
403extern short *uv_node_to_blade; 405extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac7455..f5bb64a823d7 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
1099 } s; 1099 } s;
1100}; 1100};
1101 1101
1102/* ========================================================================= */
1103/* UVH_SCRATCH5 */
1104/* ========================================================================= */
1105#define UVH_SCRATCH5 0x2d0200UL
1106#define UVH_SCRATCH5_32 0x00778
1107
1108#define UVH_SCRATCH5_SCRATCH5_SHFT 0
1109#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
1110union uvh_scratch5_u {
1111 unsigned long v;
1112 struct uvh_scratch5_s {
1113 unsigned long scratch5 : 64; /* RW, W1CS */
1114 } s;
1115};
1102 1116
1103#endif /* __ASM_UV_MMRS_X86_H__ */ 1117#endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2e2ad8..d3d859035af9 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,17 @@ struct x86_init_oem {
68}; 68};
69 69
70/** 70/**
71 * struct x86_init_mapping - platform specific initial kernel pagetable setup
72 * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
73 *
74 * For more details on the purpose of this hook, look in
75 * init_memory_mapping and the commit that added it.
76 */
77struct x86_init_mapping {
78 void (*pagetable_reserve)(u64 start, u64 end);
79};
80
81/**
71 * struct x86_init_paging - platform specific paging functions 82 * struct x86_init_paging - platform specific paging functions
72 * @pagetable_setup_start: platform specific pre paging_init() call 83 * @pagetable_setup_start: platform specific pre paging_init() call
73 * @pagetable_setup_done: platform specific post paging_init() call 84 * @pagetable_setup_done: platform specific post paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
123 struct x86_init_mpparse mpparse; 134 struct x86_init_mpparse mpparse;
124 struct x86_init_irqs irqs; 135 struct x86_init_irqs irqs;
125 struct x86_init_oem oem; 136 struct x86_init_oem oem;
137 struct x86_init_mapping mapping;
126 struct x86_init_paging paging; 138 struct x86_init_paging paging;
127 struct x86_init_timers timers; 139 struct x86_init_timers timers;
128 struct x86_init_iommu iommu; 140 struct x86_init_iommu iommu;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc095..7acd2d2ac965 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
37#include <asm/smp.h> 37#include <asm/smp.h>
38#include <asm/x86_init.h> 38#include <asm/x86_init.h>
39#include <asm/emergency-restart.h> 39#include <asm/emergency-restart.h>
40#include <asm/nmi.h>
41
42/* BMC sets a bit this MMR non-zero before sending an NMI */
43#define UVH_NMI_MMR UVH_SCRATCH5
44#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
45#define UV_NMI_PENDING_MASK (1UL << 63)
46DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
40 47
41DEFINE_PER_CPU(int, x2apic_extra_bits); 48DEFINE_PER_CPU(int, x2apic_extra_bits);
42 49
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
642 */ 649 */
643int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 650int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
644{ 651{
652 unsigned long real_uv_nmi;
653 int bid;
654
645 if (reason != DIE_NMIUNKNOWN) 655 if (reason != DIE_NMIUNKNOWN)
646 return NOTIFY_OK; 656 return NOTIFY_OK;
647 657
648 if (in_crash_kexec) 658 if (in_crash_kexec)
649 /* do nothing if entering the crash kernel */ 659 /* do nothing if entering the crash kernel */
650 return NOTIFY_OK; 660 return NOTIFY_OK;
661
651 /* 662 /*
652 * Use a lock so only one cpu prints at a time 663 * Each blade has an MMR that indicates when an NMI has been sent
653 * to prevent intermixed output. 664 * to cpus on the blade. If an NMI is detected, atomically
665 * clear the MMR and update a per-blade NMI count used to
666 * cause each cpu on the blade to notice a new NMI.
667 */
668 bid = uv_numa_blade_id();
669 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
670
671 if (unlikely(real_uv_nmi)) {
672 spin_lock(&uv_blade_info[bid].nmi_lock);
673 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
674 if (real_uv_nmi) {
675 uv_blade_info[bid].nmi_count++;
676 uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
677 }
678 spin_unlock(&uv_blade_info[bid].nmi_lock);
679 }
680
681 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
682 return NOTIFY_DONE;
683
684 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
685
686 /*
687 * Use a lock so only one cpu prints at a time.
688 * This prevents intermixed output.
654 */ 689 */
655 spin_lock(&uv_nmi_lock); 690 spin_lock(&uv_nmi_lock);
656 pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); 691 pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
657 dump_stack(); 692 dump_stack();
658 spin_unlock(&uv_nmi_lock); 693 spin_unlock(&uv_nmi_lock);
659 694
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
661} 696}
662 697
663static struct notifier_block uv_dump_stack_nmi_nb = { 698static struct notifier_block uv_dump_stack_nmi_nb = {
664 .notifier_call = uv_handle_nmi 699 .notifier_call = uv_handle_nmi,
700 .priority = NMI_LOCAL_LOW_PRIOR - 1,
665}; 701};
666 702
667void uv_register_nmi_notifier(void) 703void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
720 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); 756 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
721 757
722 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 758 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
723 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 759 uv_blade_info = kzalloc(bytes, GFP_KERNEL);
724 BUG_ON(!uv_blade_info); 760 BUG_ON(!uv_blade_info);
761
725 for (blade = 0; blade < uv_num_possible_blades(); blade++) 762 for (blade = 0; blade < uv_num_possible_blades(); blade++)
726 uv_blade_info[blade].memory_nid = -1; 763 uv_blade_info[blade].memory_nid = -1;
727 764
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
747 uv_blade_info[blade].pnode = pnode; 784 uv_blade_info[blade].pnode = pnode;
748 uv_blade_info[blade].nr_possible_cpus = 0; 785 uv_blade_info[blade].nr_possible_cpus = 0;
749 uv_blade_info[blade].nr_online_cpus = 0; 786 uv_blade_info[blade].nr_online_cpus = 0;
787 spin_lock_init(&uv_blade_info[blade].nmi_lock);
750 max_pnode = max(pnode, max_pnode); 788 max_pnode = max(pnode, max_pnode);
751 blade++; 789 blade++;
752 } 790 }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb9eb29a52dd..6f9d1f6063e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
613#endif 613#endif
614 614
615 /* As a rule processors have APIC timer running in deep C states */ 615 /* As a rule processors have APIC timer running in deep C states */
616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) 616 if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
617 set_cpu_cap(c, X86_FEATURE_ARAT); 617 set_cpu_cap(c, X86_FEATURE_ARAT);
618 618
619 /* 619 /*
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
698 */ 698 */
699 699
700const int amd_erratum_400[] = 700const int amd_erratum_400[] =
701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), 701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
703EXPORT_SYMBOL_GPL(amd_erratum_400); 703EXPORT_SYMBOL_GPL(amd_erratum_400);
704 704
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596e..bb0adad35143 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
509out_free: 509out_free:
510 if (b) { 510 if (b) {
511 kobject_put(&b->kobj); 511 kobject_put(&b->kobj);
512 list_del(&b->miscj);
512 kfree(b); 513 kfree(b);
513 } 514 }
514 return err; 515 return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97f..0f034460260d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
446 */ 446 */
447 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 447 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
448 448
449 h = lvtthmr_init;
449 /* 450 /*
450 * The initial value of thermal LVT entries on all APs always reads 451 * The initial value of thermal LVT entries on all APs always reads
451 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI 452 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
452 * sequence to them and LVT registers are reset to 0s except for 453 * sequence to them and LVT registers are reset to 0s except for
453 * the mask bits which are set to 1s when APs receive INIT IPI. 454 * the mask bits which are set to 1s when APs receive INIT IPI.
454 * Always restore the value that BIOS has programmed on AP based on 455 * If BIOS takes over the thermal interrupt and sets its interrupt
455 * BSP's info we saved since BIOS is always setting the same value 456 * delivery mode to SMI (not fixed), it restores the value that the
456 * for all threads/cores 457 * BIOS has programmed on AP based on BSP's info we saved since BIOS
458 * is always setting the same value for all threads/cores.
457 */ 459 */
458 apic_write(APIC_LVTTHMR, lvtthmr_init); 460 if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
461 apic_write(APIC_LVTTHMR, lvtthmr_init);
459 462
460 h = lvtthmr_init;
461 463
462 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 464 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
463 printk(KERN_DEBUG 465 printk(KERN_DEBUG
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c969fd9d1566..f1a6244d7d93 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1183 struct pt_regs *regs) 1183 struct pt_regs *regs)
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 unsigned long flags;
1186 1187
1187 /* This is possible if op is under delayed unoptimizing */ 1188 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp)) 1189 if (kprobe_disabled(&op->kp))
1189 return; 1190 return;
1190 1191
1191 preempt_disable(); 1192 local_irq_save(flags);
1192 if (kprobe_running()) { 1193 if (kprobe_running()) {
1193 kprobes_inc_nmissed_count(&op->kp); 1194 kprobes_inc_nmissed_count(&op->kp);
1194 } else { 1195 } else {
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1207 opt_pre_handler(&op->kp, regs); 1208 opt_pre_handler(&op->kp, regs);
1208 __this_cpu_write(current_kprobe, NULL); 1209 __this_cpu_write(current_kprobe, NULL);
1209 } 1210 }
1210 preempt_enable_no_resched(); 1211 local_irq_restore(flags);
1211} 1212}
1212 1213
1213static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) 1214static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e9128b..75ef4b18e9b7 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
61 .banner = default_banner, 61 .banner = default_banner,
62 }, 62 },
63 63
64 .mapping = {
65 .pagetable_reserve = native_pagetable_reserve,
66 },
67
64 .paging = { 68 .paging = {
65 .pagetable_setup_start = native_pagetable_setup_start, 69 .pagetable_setup_start = native_pagetable_setup_start,
66 .pagetable_setup_done = native_pagetable_setup_done, 70 .pagetable_setup_done = native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289b039b..37b8b0fe8320 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
81 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); 81 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
82} 82}
83 83
84void __init native_pagetable_reserve(u64 start, u64 end)
85{
86 memblock_x86_reserve_range(start, end, "PGTABLE");
87}
88
84struct map_range { 89struct map_range {
85 unsigned long start; 90 unsigned long start;
86 unsigned long end; 91 unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
272 277
273 __flush_tlb_all(); 278 __flush_tlb_all();
274 279
280 /*
281 * Reserve the kernel pagetable pages we used (pgt_buf_start -
282 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
283 * so that they can be reused for other purposes.
284 *
285 * On native it just means calling memblock_x86_reserve_range, on Xen it
286 * also means marking RW the pagetable pages that we allocated before
287 * but that haven't been used.
288 *
289 * In fact on xen we mark RO the whole range pgt_buf_start -
290 * pgt_buf_top, because we have to make sure that when
291 * init_memory_mapping reaches the pagetable pages area, it maps
292 * RO all the pagetable pages, including the ones that are beyond
293 * pgt_buf_end at that time.
294 */
275 if (!after_bootmem && pgt_buf_end > pgt_buf_start) 295 if (!after_bootmem && pgt_buf_end > pgt_buf_start)
276 memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, 296 x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
277 pgt_buf_end << PAGE_SHIFT, "PGTABLE"); 297 PFN_PHYS(pgt_buf_end));
278 298
279 if (!after_bootmem) 299 if (!after_bootmem)
280 early_memtest(start, end); 300 early_memtest(start, end);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f6..c58e0ea39ef5 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
699 struct mm_struct *mm, 699 struct mm_struct *mm,
700 unsigned long va, unsigned int cpu) 700 unsigned long va, unsigned int cpu)
701{ 701{
702 int tcpu;
703 int uvhub;
704 int locals = 0; 702 int locals = 0;
705 int remotes = 0; 703 int remotes = 0;
706 int hubs = 0; 704 int hubs = 0;
705 int tcpu;
706 int tpnode;
707 struct bau_desc *bau_desc; 707 struct bau_desc *bau_desc;
708 struct cpumask *flush_mask; 708 struct cpumask *flush_mask;
709 struct ptc_stats *stat; 709 struct ptc_stats *stat;
710 struct bau_control *bcp; 710 struct bau_control *bcp;
711 struct bau_control *tbcp; 711 struct bau_control *tbcp;
712 struct hub_and_pnode *hpp;
712 713
713 /* kernel was booted 'nobau' */ 714 /* kernel was booted 'nobau' */
714 if (nobau) 715 if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
750 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; 751 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
751 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 752 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
752 753
753 /* cpu statistics */
754 for_each_cpu(tcpu, flush_mask) { 754 for_each_cpu(tcpu, flush_mask) {
755 uvhub = uv_cpu_to_blade_id(tcpu); 755 /*
756 bau_uvhub_set(uvhub, &bau_desc->distribution); 756 * The distribution vector is a bit map of pnodes, relative
757 if (uvhub == bcp->uvhub) 757 * to the partition base pnode (and the partition base nasid
758 * in the header).
759 * Translate cpu to pnode and hub using an array stored
760 * in local memory.
761 */
762 hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
763 tpnode = hpp->pnode - bcp->partition_base_pnode;
764 bau_uvhub_set(tpnode, &bau_desc->distribution);
765 if (hpp->uvhub == bcp->uvhub)
758 locals++; 766 locals++;
759 else 767 else
760 remotes++; 768 remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
855 * an interrupt, but causes an error message to be returned to 863 * an interrupt, but causes an error message to be returned to
856 * the sender. 864 * the sender.
857 */ 865 */
858static void uv_enable_timeouts(void) 866static void __init uv_enable_timeouts(void)
859{ 867{
860 int uvhub; 868 int uvhub;
861 int nuvhubs; 869 int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
1326} 1334}
1327 1335
1328/* 1336/*
1329 * initialize the sending side's sending buffers 1337 * Initialize the sending side's sending buffers.
1330 */ 1338 */
1331static void 1339static void
1332uv_activation_descriptor_init(int node, int pnode) 1340uv_activation_descriptor_init(int node, int pnode, int base_pnode)
1333{ 1341{
1334 int i; 1342 int i;
1335 int cpu; 1343 int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
1352 n = pa >> uv_nshift; 1360 n = pa >> uv_nshift;
1353 m = pa & uv_mmask; 1361 m = pa & uv_mmask;
1354 1362
1363 /* the 14-bit pnode */
1355 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 1364 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
1356 (n << UV_DESC_BASE_PNODE_SHIFT | m)); 1365 (n << UV_DESC_BASE_PNODE_SHIFT | m));
1357
1358 /* 1366 /*
1359 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 1367 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
1360 * cpu even though we only use the first one; one descriptor can 1368 * cpu even though we only use the first one; one descriptor can
1361 * describe a broadcast to 256 uv hubs. 1369 * describe a broadcast to 256 uv hubs.
1362 */ 1370 */
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
1365 memset(bd2, 0, sizeof(struct bau_desc)); 1373 memset(bd2, 0, sizeof(struct bau_desc));
1366 bd2->header.sw_ack_flag = 1; 1374 bd2->header.sw_ack_flag = 1;
1367 /* 1375 /*
1368 * base_dest_nodeid is the nasid of the first uvhub 1376 * The base_dest_nasid set in the message header is the nasid
1369 * in the partition. The bit map will indicate uvhub numbers, 1377 * of the first uvhub in the partition. The bit map will
1370 * which are 0-N in a partition. Pnodes are unique system-wide. 1378 * indicate destination pnode numbers relative to that base.
1379 * They may not be consecutive if nasid striding is being used.
1371 */ 1380 */
1372 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); 1381 bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
1373 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1382 bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
1374 bd2->header.command = UV_NET_ENDPOINT_INTD; 1383 bd2->header.command = UV_NET_ENDPOINT_INTD;
1375 bd2->header.int_both = 1; 1384 bd2->header.int_both = 1;
1376 /* 1385 /*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
1442/* 1451/*
1443 * Initialization of each UV hub's structures 1452 * Initialization of each UV hub's structures
1444 */ 1453 */
1445static void __init uv_init_uvhub(int uvhub, int vector) 1454static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
1446{ 1455{
1447 int node; 1456 int node;
1448 int pnode; 1457 int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
1450 1459
1451 node = uvhub_to_first_node(uvhub); 1460 node = uvhub_to_first_node(uvhub);
1452 pnode = uv_blade_to_pnode(uvhub); 1461 pnode = uv_blade_to_pnode(uvhub);
1453 uv_activation_descriptor_init(node, pnode); 1462 uv_activation_descriptor_init(node, pnode, base_pnode);
1454 uv_payload_queue_init(node, pnode); 1463 uv_payload_queue_init(node, pnode);
1455 /* 1464 /*
1456 * the below initialization can't be in firmware because the 1465 * The below initialization can't be in firmware because the
1457 * messaging IRQ will be determined by the OS 1466 * messaging IRQ will be determined by the OS.
1458 */ 1467 */
1459 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; 1468 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1460 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1469 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
1491/* 1500/*
1492 * initialize the bau_control structure for each cpu 1501 * initialize the bau_control structure for each cpu
1493 */ 1502 */
1494static int __init uv_init_per_cpu(int nuvhubs) 1503static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
1495{ 1504{
1496 int i; 1505 int i;
1497 int cpu; 1506 int cpu;
1507 int tcpu;
1498 int pnode; 1508 int pnode;
1499 int uvhub; 1509 int uvhub;
1500 int have_hmaster; 1510 int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
1528 bcp = &per_cpu(bau_control, cpu); 1538 bcp = &per_cpu(bau_control, cpu);
1529 memset(bcp, 0, sizeof(struct bau_control)); 1539 memset(bcp, 0, sizeof(struct bau_control));
1530 pnode = uv_cpu_hub_info(cpu)->pnode; 1540 pnode = uv_cpu_hub_info(cpu)->pnode;
1541 if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
1542 printk(KERN_EMERG
1543 "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
1544 cpu, pnode, base_part_pnode,
1545 UV_DISTRIBUTION_SIZE);
1546 return 1;
1547 }
1548 bcp->osnode = cpu_to_node(cpu);
1549 bcp->partition_base_pnode = uv_partition_base_pnode;
1531 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; 1550 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
1532 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); 1551 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1533 bdp = &uvhub_descs[uvhub]; 1552 bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
1536 bdp->pnode = pnode; 1555 bdp->pnode = pnode;
1537 /* kludge: 'assuming' one node per socket, and assuming that 1556 /* kludge: 'assuming' one node per socket, and assuming that
1538 disabling a socket just leaves a gap in node numbers */ 1557 disabling a socket just leaves a gap in node numbers */
1539 socket = (cpu_to_node(cpu) & 1); 1558 socket = bcp->osnode & 1;
1540 bdp->socket_mask |= (1 << socket); 1559 bdp->socket_mask |= (1 << socket);
1541 sdp = &bdp->socket[socket]; 1560 sdp = &bdp->socket[socket];
1542 sdp->cpu_number[sdp->num_cpus] = cpu; 1561 sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
1585nextsocket: 1604nextsocket:
1586 socket++; 1605 socket++;
1587 socket_mask = (socket_mask >> 1); 1606 socket_mask = (socket_mask >> 1);
1607 /* each socket gets a local array of pnodes/hubs */
1608 bcp = smaster;
1609 bcp->target_hub_and_pnode = kmalloc_node(
1610 sizeof(struct hub_and_pnode) *
1611 num_possible_cpus(), GFP_KERNEL, bcp->osnode);
1612 memset(bcp->target_hub_and_pnode, 0,
1613 sizeof(struct hub_and_pnode) *
1614 num_possible_cpus());
1615 for_each_present_cpu(tcpu) {
1616 bcp->target_hub_and_pnode[tcpu].pnode =
1617 uv_cpu_hub_info(tcpu)->pnode;
1618 bcp->target_hub_and_pnode[tcpu].uvhub =
1619 uv_cpu_hub_info(tcpu)->numa_blade_id;
1620 }
1588 } 1621 }
1589 } 1622 }
1590 kfree(uvhub_descs); 1623 kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
1637 spin_lock_init(&disable_lock); 1670 spin_lock_init(&disable_lock);
1638 congested_cycles = microsec_2_cycles(congested_response_us); 1671 congested_cycles = microsec_2_cycles(congested_response_us);
1639 1672
1640 if (uv_init_per_cpu(nuvhubs)) {
1641 nobau = 1;
1642 return 0;
1643 }
1644
1645 uv_partition_base_pnode = 0x7fffffff; 1673 uv_partition_base_pnode = 0x7fffffff;
1646 for (uvhub = 0; uvhub < nuvhubs; uvhub++) 1674 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1647 if (uv_blade_nr_possible_cpus(uvhub) && 1675 if (uv_blade_nr_possible_cpus(uvhub) &&
1648 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) 1676 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
1649 uv_partition_base_pnode = uv_blade_to_pnode(uvhub); 1677 uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
1678 }
1679
1680 if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
1681 nobau = 1;
1682 return 0;
1683 }
1650 1684
1651 vector = UV_BAU_MESSAGE; 1685 vector = UV_BAU_MESSAGE;
1652 for_each_possible_blade(uvhub) 1686 for_each_possible_blade(uvhub)
1653 if (uv_blade_nr_possible_cpus(uvhub)) 1687 if (uv_blade_nr_possible_cpus(uvhub))
1654 uv_init_uvhub(uvhub, vector); 1688 uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
1655 1689
1656 uv_enable_timeouts(); 1690 uv_enable_timeouts();
1657 alloc_intr_gate(vector, uv_bau_message_intr1); 1691 alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 55c965b38c27..0684f3c74d53 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
1275{ 1275{
1276} 1276}
1277 1277
1278static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1279{
1280 /* reserve the range used */
1281 native_pagetable_reserve(start, end);
1282
1283 /* set as RW the rest */
1284 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
1285 PFN_PHYS(pgt_buf_top));
1286 while (end < PFN_PHYS(pgt_buf_top)) {
1287 make_lowmem_page_readwrite(__va(end));
1288 end += PAGE_SIZE;
1289 }
1290}
1291
1278static void xen_post_allocator_init(void); 1292static void xen_post_allocator_init(void);
1279 1293
1280static __init void xen_pagetable_setup_done(pgd_t *base) 1294static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -1463,119 +1477,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
1463 return ret; 1477 return ret;
1464} 1478}
1465 1479
1466#ifdef CONFIG_X86_64
1467static __initdata u64 __last_pgt_set_rw = 0;
1468static __initdata u64 __pgt_buf_start = 0;
1469static __initdata u64 __pgt_buf_end = 0;
1470static __initdata u64 __pgt_buf_top = 0;
1471/*
1472 * As a consequence of the commit:
1473 *
1474 * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
1475 * Author: Yinghai Lu <yinghai@kernel.org>
1476 * Date: Fri Dec 17 16:58:28 2010 -0800
1477 *
1478 * x86-64, mm: Put early page table high
1479 *
1480 * at some point init_memory_mapping is going to reach the pagetable pages
1481 * area and map those pages too (mapping them as normal memory that falls
1482 * in the range of addresses passed to init_memory_mapping as argument).
1483 * Some of those pages are already pagetable pages (they are in the range
1484 * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
1485 * everything is fine.
1486 * Some of these pages are not pagetable pages yet (they fall in the range
1487 * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
1488 * are going to be mapped RW. When these pages become pagetable pages and
1489 * are hooked into the pagetable, xen will find that the guest has already
1490 * a RW mapping of them somewhere and fail the operation.
1491 * The reason Xen requires pagetables to be RO is that the hypervisor needs
1492 * to verify that the pagetables are valid before using them. The validation
1493 * operations are called "pinning".
1494 *
1495 * In order to fix the issue we mark all the pages in the entire range
1496 * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
1497 * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
1498 * init_memory_mapping. Hence the kernel is going to crash as soon as one
1499 * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
1500 * ranges are RO).
1501 *
1502 * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
1503 * the init_memory_mapping has completed (in a perfect world we would
1504 * call this function from init_memory_mapping, but lets ignore that).
1505 *
1506 * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
1507 * end,top] have all changed to new values (b/c init_memory_mapping
1508 * is called and setting up another new page-table). Hence, the first time
1509 * we enter this function, we save away the pgt_buf_start value and update
1510 * the pgt_buf_[end,top].
1511 *
1512 * When we detect that the "old" pgt_buf_start through pgt_buf_end
1513 * PFNs have been reserved (so memblock_x86_reserve_range has been called),
1514 * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
1515 *
1516 * And then we update those "old" pgt_buf_[end|top] with the new ones
1517 * so that we can redo this on the next pagetable.
1518 */
1519static __init void mark_rw_past_pgt(void) {
1520
1521 if (pgt_buf_end > pgt_buf_start) {
1522 u64 addr, size;
1523
1524 /* Save it away. */
1525 if (!__pgt_buf_start) {
1526 __pgt_buf_start = pgt_buf_start;
1527 __pgt_buf_end = pgt_buf_end;
1528 __pgt_buf_top = pgt_buf_top;
1529 return;
1530 }
1531 /* If we get the range that starts at __pgt_buf_end that means
1532 * the range is reserved, and that in 'init_memory_mapping'
1533 * the 'memblock_x86_reserve_range' has been called with the
1534 * outdated __pgt_buf_start, __pgt_buf_end (the "new"
1535 * pgt_buf_[start|end|top] refer now to a new pagetable.
1536 * Note: we are called _after_ the pgt_buf_[..] have been
1537 * updated.*/
1538
1539 addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
1540 &size, PAGE_SIZE);
1541
1542 /* Still not reserved, meaning 'memblock_x86_reserve_range'
1543 * hasn't been called yet. Update the _end and _top.*/
1544 if (addr == PFN_PHYS(__pgt_buf_start)) {
1545 __pgt_buf_end = pgt_buf_end;
1546 __pgt_buf_top = pgt_buf_top;
1547 return;
1548 }
1549
1550 /* OK, the area is reserved, meaning it is time for us to
1551 * set RW for the old end->top PFNs. */
1552
1553 /* ..unless we had already done this. */
1554 if (__pgt_buf_end == __last_pgt_set_rw)
1555 return;
1556
1557 addr = PFN_PHYS(__pgt_buf_end);
1558
1559 /* set as RW the rest */
1560 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
1561 PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
1562
1563 while (addr < PFN_PHYS(__pgt_buf_top)) {
1564 make_lowmem_page_readwrite(__va(addr));
1565 addr += PAGE_SIZE;
1566 }
1567 /* And update everything so that we are ready for the next
1568 * pagetable (the one created for regions past 4GB) */
1569 __last_pgt_set_rw = __pgt_buf_end;
1570 __pgt_buf_start = pgt_buf_start;
1571 __pgt_buf_end = pgt_buf_end;
1572 __pgt_buf_top = pgt_buf_top;
1573 }
1574 return;
1575}
1576#else
1577static __init void mark_rw_past_pgt(void) { }
1578#endif
1579static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) 1480static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1580{ 1481{
1581#ifdef CONFIG_X86_64 1482#ifdef CONFIG_X86_64
@@ -1602,14 +1503,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1602 unsigned long pfn = pte_pfn(pte); 1503 unsigned long pfn = pte_pfn(pte);
1603 1504
1604 /* 1505 /*
1605 * A bit of optimization. We do not need to call the workaround
1606 * when xen_set_pte_init is called with a PTE with 0 as PFN.
1607 * That is b/c the pagetable at that point are just being populated
1608 * with empty values and we can save some cycles by not calling
1609 * the 'memblock' code.*/
1610 if (pfn)
1611 mark_rw_past_pgt();
1612 /*
1613 * If the new pfn is within the range of the newly allocated 1506 * If the new pfn is within the range of the newly allocated
1614 * kernel pagetable, and it isn't being mapped into an 1507 * kernel pagetable, and it isn't being mapped into an
1615 * early_ioremap fixmap slot as a freshly allocated page, make sure 1508 * early_ioremap fixmap slot as a freshly allocated page, make sure
@@ -2118,8 +2011,6 @@ __init void xen_ident_map_ISA(void)
2118 2011
2119static __init void xen_post_allocator_init(void) 2012static __init void xen_post_allocator_init(void)
2120{ 2013{
2121 mark_rw_past_pgt();
2122
2123#ifdef CONFIG_XEN_DEBUG 2014#ifdef CONFIG_XEN_DEBUG
2124 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); 2015 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
2125#endif 2016#endif
@@ -2228,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
2228 2119
2229void __init xen_init_mmu_ops(void) 2120void __init xen_init_mmu_ops(void)
2230{ 2121{
2122 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2231 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2123 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
2232 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2124 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2233 pv_mmu_ops = xen_mmu_ops; 2125 pv_mmu_ops = xen_mmu_ops;