diff options
author | Tony Lindgren <tony@atomide.com> | 2011-05-24 03:45:06 -0400 |
---|---|---|
committer | Tony Lindgren <tony@atomide.com> | 2011-05-24 03:45:06 -0400 |
commit | 9b28b11e2a648f07c8481b9666ccf1c088e1ab74 (patch) | |
tree | ac3db2d4ae69e393d8423bb8c9304c75023dc805 /arch | |
parent | b7679ab3f70482ff4b75a8c735c8224ebedb6020 (diff) | |
parent | 99aa18278e867574d72201b806f82ace07d4804b (diff) |
Merge branch 'for_2.6.40/pm-cleanup' of ssh://master.kernel.org/pub/scm/linux/kernel/git/khilman/linux-omap-pm into omap-for-linus
Diffstat (limited to 'arch')
37 files changed, 451 insertions, 543 deletions
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 058937bf5a77..b1834166922d 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h | |||
@@ -452,10 +452,14 @@ | |||
452 | #define __NR_fanotify_init 494 | 452 | #define __NR_fanotify_init 494 |
453 | #define __NR_fanotify_mark 495 | 453 | #define __NR_fanotify_mark 495 |
454 | #define __NR_prlimit64 496 | 454 | #define __NR_prlimit64 496 |
455 | #define __NR_name_to_handle_at 497 | ||
456 | #define __NR_open_by_handle_at 498 | ||
457 | #define __NR_clock_adjtime 499 | ||
458 | #define __NR_syncfs 500 | ||
455 | 459 | ||
456 | #ifdef __KERNEL__ | 460 | #ifdef __KERNEL__ |
457 | 461 | ||
458 | #define NR_SYSCALLS 497 | 462 | #define NR_SYSCALLS 501 |
459 | 463 | ||
460 | #define __ARCH_WANT_IPC_PARSE_VERSION | 464 | #define __ARCH_WANT_IPC_PARSE_VERSION |
461 | #define __ARCH_WANT_OLD_READDIR | 465 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index a6a1de9db16f..15f999d41c75 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S | |||
@@ -498,23 +498,27 @@ sys_call_table: | |||
498 | .quad sys_ni_syscall /* sys_timerfd */ | 498 | .quad sys_ni_syscall /* sys_timerfd */ |
499 | .quad sys_eventfd | 499 | .quad sys_eventfd |
500 | .quad sys_recvmmsg | 500 | .quad sys_recvmmsg |
501 | .quad sys_fallocate /* 480 */ | 501 | .quad sys_fallocate /* 480 */ |
502 | .quad sys_timerfd_create | 502 | .quad sys_timerfd_create |
503 | .quad sys_timerfd_settime | 503 | .quad sys_timerfd_settime |
504 | .quad sys_timerfd_gettime | 504 | .quad sys_timerfd_gettime |
505 | .quad sys_signalfd4 | 505 | .quad sys_signalfd4 |
506 | .quad sys_eventfd2 /* 485 */ | 506 | .quad sys_eventfd2 /* 485 */ |
507 | .quad sys_epoll_create1 | 507 | .quad sys_epoll_create1 |
508 | .quad sys_dup3 | 508 | .quad sys_dup3 |
509 | .quad sys_pipe2 | 509 | .quad sys_pipe2 |
510 | .quad sys_inotify_init1 | 510 | .quad sys_inotify_init1 |
511 | .quad sys_preadv /* 490 */ | 511 | .quad sys_preadv /* 490 */ |
512 | .quad sys_pwritev | 512 | .quad sys_pwritev |
513 | .quad sys_rt_tgsigqueueinfo | 513 | .quad sys_rt_tgsigqueueinfo |
514 | .quad sys_perf_event_open | 514 | .quad sys_perf_event_open |
515 | .quad sys_fanotify_init | 515 | .quad sys_fanotify_init |
516 | .quad sys_fanotify_mark /* 495 */ | 516 | .quad sys_fanotify_mark /* 495 */ |
517 | .quad sys_prlimit64 | 517 | .quad sys_prlimit64 |
518 | .quad sys_name_to_handle_at | ||
519 | .quad sys_open_by_handle_at | ||
520 | .quad sys_clock_adjtime | ||
521 | .quad sys_syncfs /* 500 */ | ||
518 | 522 | ||
519 | .size sys_call_table, . - sys_call_table | 523 | .size sys_call_table, . - sys_call_table |
520 | .type sys_call_table, @object | 524 | .type sys_call_table, @object |
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 918e8e0b72ff..818e74ed45dc 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c | |||
@@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = { | |||
375 | 375 | ||
376 | static inline void register_rpcc_clocksource(long cycle_freq) | 376 | static inline void register_rpcc_clocksource(long cycle_freq) |
377 | { | 377 | { |
378 | clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4); | 378 | clocksource_register_hz(&clocksource_rpcc, cycle_freq); |
379 | clocksource_register(&clocksource_rpcc); | ||
380 | } | 379 | } |
381 | #else /* !CONFIG_SMP */ | 380 | #else /* !CONFIG_SMP */ |
382 | static inline void register_rpcc_clocksource(long cycle_freq) | 381 | static inline void register_rpcc_clocksource(long cycle_freq) |
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c index 99218a5299ca..52dbdf3ab66c 100644 --- a/arch/arm/mach-omap2/board-3430sdp.c +++ b/arch/arm/mach-omap2/board-3430sdp.c | |||
@@ -59,24 +59,6 @@ | |||
59 | 59 | ||
60 | #define TWL4030_MSECURE_GPIO 22 | 60 | #define TWL4030_MSECURE_GPIO 22 |
61 | 61 | ||
62 | /* FIXME: These values need to be updated based on more profiling on 3430sdp*/ | ||
63 | static struct cpuidle_params omap3_cpuidle_params_table[] = { | ||
64 | /* C1 */ | ||
65 | {1, 2, 2, 5}, | ||
66 | /* C2 */ | ||
67 | {1, 10, 10, 30}, | ||
68 | /* C3 */ | ||
69 | {1, 50, 50, 300}, | ||
70 | /* C4 */ | ||
71 | {1, 1500, 1800, 4000}, | ||
72 | /* C5 */ | ||
73 | {1, 2500, 7500, 12000}, | ||
74 | /* C6 */ | ||
75 | {1, 3000, 8500, 15000}, | ||
76 | /* C7 */ | ||
77 | {1, 10000, 30000, 300000}, | ||
78 | }; | ||
79 | |||
80 | static uint32_t board_keymap[] = { | 62 | static uint32_t board_keymap[] = { |
81 | KEY(0, 0, KEY_LEFT), | 63 | KEY(0, 0, KEY_LEFT), |
82 | KEY(0, 1, KEY_RIGHT), | 64 | KEY(0, 1, KEY_RIGHT), |
@@ -800,7 +782,6 @@ static void __init omap_3430sdp_init(void) | |||
800 | omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); | 782 | omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); |
801 | omap_board_config = sdp3430_config; | 783 | omap_board_config = sdp3430_config; |
802 | omap_board_config_size = ARRAY_SIZE(sdp3430_config); | 784 | omap_board_config_size = ARRAY_SIZE(sdp3430_config); |
803 | omap3_pm_init_cpuidle(omap3_cpuidle_params_table); | ||
804 | omap3430_i2c_init(); | 785 | omap3430_i2c_init(); |
805 | omap_display_init(&sdp3430_dss_data); | 786 | omap_display_init(&sdp3430_dss_data); |
806 | if (omap_rev() > OMAP3430_REV_ES1_0) | 787 | if (omap_rev() > OMAP3430_REV_ES1_0) |
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c index f8ba20a14e62..fec4cac8fa0a 100644 --- a/arch/arm/mach-omap2/board-rx51.c +++ b/arch/arm/mach-omap2/board-rx51.c | |||
@@ -58,21 +58,25 @@ static struct platform_device leds_gpio = { | |||
58 | }, | 58 | }, |
59 | }; | 59 | }; |
60 | 60 | ||
61 | /* | ||
62 | * cpuidle C-states definition override from the default values. | ||
63 | * The 'exit_latency' field is the sum of sleep and wake-up latencies. | ||
64 | */ | ||
61 | static struct cpuidle_params rx51_cpuidle_params[] = { | 65 | static struct cpuidle_params rx51_cpuidle_params[] = { |
62 | /* C1 */ | 66 | /* C1 */ |
63 | {1, 110, 162, 5}, | 67 | {110 + 162, 5 , 1}, |
64 | /* C2 */ | 68 | /* C2 */ |
65 | {1, 106, 180, 309}, | 69 | {106 + 180, 309, 1}, |
66 | /* C3 */ | 70 | /* C3 */ |
67 | {0, 107, 410, 46057}, | 71 | {107 + 410, 46057, 0}, |
68 | /* C4 */ | 72 | /* C4 */ |
69 | {0, 121, 3374, 46057}, | 73 | {121 + 3374, 46057, 0}, |
70 | /* C5 */ | 74 | /* C5 */ |
71 | {1, 855, 1146, 46057}, | 75 | {855 + 1146, 46057, 1}, |
72 | /* C6 */ | 76 | /* C6 */ |
73 | {0, 7580, 4134, 484329}, | 77 | {7580 + 4134, 484329, 0}, |
74 | /* C7 */ | 78 | /* C7 */ |
75 | {1, 7505, 15274, 484329}, | 79 | {7505 + 15274, 484329, 1}, |
76 | }; | 80 | }; |
77 | 81 | ||
78 | static struct omap_lcd_config rx51_lcd_config = { | 82 | static struct omap_lcd_config rx51_lcd_config = { |
diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 1c240eff3918..4bf6e6e8b100 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c | |||
@@ -36,36 +36,6 @@ | |||
36 | 36 | ||
37 | #ifdef CONFIG_CPU_IDLE | 37 | #ifdef CONFIG_CPU_IDLE |
38 | 38 | ||
39 | #define OMAP3_MAX_STATES 7 | ||
40 | #define OMAP3_STATE_C1 0 /* C1 - MPU WFI + Core active */ | ||
41 | #define OMAP3_STATE_C2 1 /* C2 - MPU WFI + Core inactive */ | ||
42 | #define OMAP3_STATE_C3 2 /* C3 - MPU CSWR + Core inactive */ | ||
43 | #define OMAP3_STATE_C4 3 /* C4 - MPU OFF + Core iactive */ | ||
44 | #define OMAP3_STATE_C5 4 /* C5 - MPU RET + Core RET */ | ||
45 | #define OMAP3_STATE_C6 5 /* C6 - MPU OFF + Core RET */ | ||
46 | #define OMAP3_STATE_C7 6 /* C7 - MPU OFF + Core OFF */ | ||
47 | |||
48 | #define OMAP3_STATE_MAX OMAP3_STATE_C7 | ||
49 | |||
50 | #define CPUIDLE_FLAG_CHECK_BM 0x10000 /* use omap3_enter_idle_bm() */ | ||
51 | |||
52 | struct omap3_processor_cx { | ||
53 | u8 valid; | ||
54 | u8 type; | ||
55 | u32 sleep_latency; | ||
56 | u32 wakeup_latency; | ||
57 | u32 mpu_state; | ||
58 | u32 core_state; | ||
59 | u32 threshold; | ||
60 | u32 flags; | ||
61 | const char *desc; | ||
62 | }; | ||
63 | |||
64 | struct omap3_processor_cx omap3_power_states[OMAP3_MAX_STATES]; | ||
65 | struct omap3_processor_cx current_cx_state; | ||
66 | struct powerdomain *mpu_pd, *core_pd, *per_pd; | ||
67 | struct powerdomain *cam_pd; | ||
68 | |||
69 | /* | 39 | /* |
70 | * The latencies/thresholds for various C states have | 40 | * The latencies/thresholds for various C states have |
71 | * to be configured from the respective board files. | 41 | * to be configured from the respective board files. |
@@ -75,27 +45,31 @@ struct powerdomain *cam_pd; | |||
75 | */ | 45 | */ |
76 | static struct cpuidle_params cpuidle_params_table[] = { | 46 | static struct cpuidle_params cpuidle_params_table[] = { |
77 | /* C1 */ | 47 | /* C1 */ |
78 | {1, 2, 2, 5}, | 48 | {2 + 2, 5, 1}, |
79 | /* C2 */ | 49 | /* C2 */ |
80 | {1, 10, 10, 30}, | 50 | {10 + 10, 30, 1}, |
81 | /* C3 */ | 51 | /* C3 */ |
82 | {1, 50, 50, 300}, | 52 | {50 + 50, 300, 1}, |
83 | /* C4 */ | 53 | /* C4 */ |
84 | {1, 1500, 1800, 4000}, | 54 | {1500 + 1800, 4000, 1}, |
85 | /* C5 */ | 55 | /* C5 */ |
86 | {1, 2500, 7500, 12000}, | 56 | {2500 + 7500, 12000, 1}, |
87 | /* C6 */ | 57 | /* C6 */ |
88 | {1, 3000, 8500, 15000}, | 58 | {3000 + 8500, 15000, 1}, |
89 | /* C7 */ | 59 | /* C7 */ |
90 | {1, 10000, 30000, 300000}, | 60 | {10000 + 30000, 300000, 1}, |
91 | }; | 61 | }; |
62 | #define OMAP3_NUM_STATES ARRAY_SIZE(cpuidle_params_table) | ||
92 | 63 | ||
93 | static int omap3_idle_bm_check(void) | 64 | /* Mach specific information to be recorded in the C-state driver_data */ |
94 | { | 65 | struct omap3_idle_statedata { |
95 | if (!omap3_can_sleep()) | 66 | u32 mpu_state; |
96 | return 1; | 67 | u32 core_state; |
97 | return 0; | 68 | u8 valid; |
98 | } | 69 | }; |
70 | struct omap3_idle_statedata omap3_idle_data[OMAP3_NUM_STATES]; | ||
71 | |||
72 | struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd; | ||
99 | 73 | ||
100 | static int _cpuidle_allow_idle(struct powerdomain *pwrdm, | 74 | static int _cpuidle_allow_idle(struct powerdomain *pwrdm, |
101 | struct clockdomain *clkdm) | 75 | struct clockdomain *clkdm) |
@@ -122,12 +96,10 @@ static int _cpuidle_deny_idle(struct powerdomain *pwrdm, | |||
122 | static int omap3_enter_idle(struct cpuidle_device *dev, | 96 | static int omap3_enter_idle(struct cpuidle_device *dev, |
123 | struct cpuidle_state *state) | 97 | struct cpuidle_state *state) |
124 | { | 98 | { |
125 | struct omap3_processor_cx *cx = cpuidle_get_statedata(state); | 99 | struct omap3_idle_statedata *cx = cpuidle_get_statedata(state); |
126 | struct timespec ts_preidle, ts_postidle, ts_idle; | 100 | struct timespec ts_preidle, ts_postidle, ts_idle; |
127 | u32 mpu_state = cx->mpu_state, core_state = cx->core_state; | 101 | u32 mpu_state = cx->mpu_state, core_state = cx->core_state; |
128 | 102 | ||
129 | current_cx_state = *cx; | ||
130 | |||
131 | /* Used to keep track of the total time in idle */ | 103 | /* Used to keep track of the total time in idle */ |
132 | getnstimeofday(&ts_preidle); | 104 | getnstimeofday(&ts_preidle); |
133 | 105 | ||
@@ -140,7 +112,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev, | |||
140 | if (omap_irq_pending() || need_resched()) | 112 | if (omap_irq_pending() || need_resched()) |
141 | goto return_sleep_time; | 113 | goto return_sleep_time; |
142 | 114 | ||
143 | if (cx->type == OMAP3_STATE_C1) { | 115 | /* Deny idle for C1 */ |
116 | if (state == &dev->states[0]) { | ||
144 | pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle); | 117 | pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle); |
145 | pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle); | 118 | pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle); |
146 | } | 119 | } |
@@ -148,7 +121,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev, | |||
148 | /* Execute ARM wfi */ | 121 | /* Execute ARM wfi */ |
149 | omap_sram_idle(); | 122 | omap_sram_idle(); |
150 | 123 | ||
151 | if (cx->type == OMAP3_STATE_C1) { | 124 | /* Re-allow idle for C1 */ |
125 | if (state == &dev->states[0]) { | ||
152 | pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle); | 126 | pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle); |
153 | pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle); | 127 | pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle); |
154 | } | 128 | } |
@@ -164,41 +138,53 @@ return_sleep_time: | |||
164 | } | 138 | } |
165 | 139 | ||
166 | /** | 140 | /** |
167 | * next_valid_state - Find next valid c-state | 141 | * next_valid_state - Find next valid C-state |
168 | * @dev: cpuidle device | 142 | * @dev: cpuidle device |
169 | * @state: Currently selected c-state | 143 | * @state: Currently selected C-state |
170 | * | 144 | * |
171 | * If the current state is valid, it is returned back to the caller. | 145 | * If the current state is valid, it is returned back to the caller. |
172 | * Else, this function searches for a lower c-state which is still | 146 | * Else, this function searches for a lower c-state which is still |
173 | * valid (as defined in omap3_power_states[]). | 147 | * valid. |
148 | * | ||
149 | * A state is valid if the 'valid' field is enabled and | ||
150 | * if it satisfies the enable_off_mode condition. | ||
174 | */ | 151 | */ |
175 | static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, | 152 | static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, |
176 | struct cpuidle_state *curr) | 153 | struct cpuidle_state *curr) |
177 | { | 154 | { |
178 | struct cpuidle_state *next = NULL; | 155 | struct cpuidle_state *next = NULL; |
179 | struct omap3_processor_cx *cx; | 156 | struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr); |
157 | u32 mpu_deepest_state = PWRDM_POWER_RET; | ||
158 | u32 core_deepest_state = PWRDM_POWER_RET; | ||
180 | 159 | ||
181 | cx = (struct omap3_processor_cx *)cpuidle_get_statedata(curr); | 160 | if (enable_off_mode) { |
161 | mpu_deepest_state = PWRDM_POWER_OFF; | ||
162 | /* | ||
163 | * Erratum i583: valable for ES rev < Es1.2 on 3630. | ||
164 | * CORE OFF mode is not supported in a stable form, restrict | ||
165 | * instead the CORE state to RET. | ||
166 | */ | ||
167 | if (!IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) | ||
168 | core_deepest_state = PWRDM_POWER_OFF; | ||
169 | } | ||
182 | 170 | ||
183 | /* Check if current state is valid */ | 171 | /* Check if current state is valid */ |
184 | if (cx->valid) { | 172 | if ((cx->valid) && |
173 | (cx->mpu_state >= mpu_deepest_state) && | ||
174 | (cx->core_state >= core_deepest_state)) { | ||
185 | return curr; | 175 | return curr; |
186 | } else { | 176 | } else { |
187 | u8 idx = OMAP3_STATE_MAX; | 177 | int idx = OMAP3_NUM_STATES - 1; |
188 | 178 | ||
189 | /* | 179 | /* Reach the current state starting at highest C-state */ |
190 | * Reach the current state starting at highest C-state | 180 | for (; idx >= 0; idx--) { |
191 | */ | ||
192 | for (; idx >= OMAP3_STATE_C1; idx--) { | ||
193 | if (&dev->states[idx] == curr) { | 181 | if (&dev->states[idx] == curr) { |
194 | next = &dev->states[idx]; | 182 | next = &dev->states[idx]; |
195 | break; | 183 | break; |
196 | } | 184 | } |
197 | } | 185 | } |
198 | 186 | ||
199 | /* | 187 | /* Should never hit this condition */ |
200 | * Should never hit this condition. | ||
201 | */ | ||
202 | WARN_ON(next == NULL); | 188 | WARN_ON(next == NULL); |
203 | 189 | ||
204 | /* | 190 | /* |
@@ -206,17 +192,17 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, | |||
206 | * Start search from the next (lower) state. | 192 | * Start search from the next (lower) state. |
207 | */ | 193 | */ |
208 | idx--; | 194 | idx--; |
209 | for (; idx >= OMAP3_STATE_C1; idx--) { | 195 | for (; idx >= 0; idx--) { |
210 | struct omap3_processor_cx *cx; | ||
211 | |||
212 | cx = cpuidle_get_statedata(&dev->states[idx]); | 196 | cx = cpuidle_get_statedata(&dev->states[idx]); |
213 | if (cx->valid) { | 197 | if ((cx->valid) && |
198 | (cx->mpu_state >= mpu_deepest_state) && | ||
199 | (cx->core_state >= core_deepest_state)) { | ||
214 | next = &dev->states[idx]; | 200 | next = &dev->states[idx]; |
215 | break; | 201 | break; |
216 | } | 202 | } |
217 | } | 203 | } |
218 | /* | 204 | /* |
219 | * C1 and C2 are always valid. | 205 | * C1 is always valid. |
220 | * So, no need to check for 'next==NULL' outside this loop. | 206 | * So, no need to check for 'next==NULL' outside this loop. |
221 | */ | 207 | */ |
222 | } | 208 | } |
@@ -229,36 +215,22 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, | |||
229 | * @dev: cpuidle device | 215 | * @dev: cpuidle device |
230 | * @state: The target state to be programmed | 216 | * @state: The target state to be programmed |
231 | * | 217 | * |
232 | * Used for C states with CPUIDLE_FLAG_CHECK_BM flag set. This | 218 | * This function checks for any pending activity and then programs |
233 | * function checks for any pending activity and then programs the | 219 | * the device to the specified or a safer state. |
234 | * device to the specified or a safer state. | ||
235 | */ | 220 | */ |
236 | static int omap3_enter_idle_bm(struct cpuidle_device *dev, | 221 | static int omap3_enter_idle_bm(struct cpuidle_device *dev, |
237 | struct cpuidle_state *state) | 222 | struct cpuidle_state *state) |
238 | { | 223 | { |
239 | struct cpuidle_state *new_state = next_valid_state(dev, state); | 224 | struct cpuidle_state *new_state; |
240 | u32 core_next_state, per_next_state = 0, per_saved_state = 0; | 225 | u32 core_next_state, per_next_state = 0, per_saved_state = 0, cam_state; |
241 | u32 cam_state; | 226 | struct omap3_idle_statedata *cx; |
242 | struct omap3_processor_cx *cx; | ||
243 | int ret; | 227 | int ret; |
244 | 228 | ||
245 | if ((state->flags & CPUIDLE_FLAG_CHECK_BM) && omap3_idle_bm_check()) { | 229 | if (!omap3_can_sleep()) { |
246 | BUG_ON(!dev->safe_state); | ||
247 | new_state = dev->safe_state; | 230 | new_state = dev->safe_state; |
248 | goto select_state; | 231 | goto select_state; |
249 | } | 232 | } |
250 | 233 | ||
251 | cx = cpuidle_get_statedata(state); | ||
252 | core_next_state = cx->core_state; | ||
253 | |||
254 | /* | ||
255 | * FIXME: we currently manage device-specific idle states | ||
256 | * for PER and CORE in combination with CPU-specific | ||
257 | * idle states. This is wrong, and device-specific | ||
258 | * idle management needs to be separated out into | ||
259 | * its own code. | ||
260 | */ | ||
261 | |||
262 | /* | 234 | /* |
263 | * Prevent idle completely if CAM is active. | 235 | * Prevent idle completely if CAM is active. |
264 | * CAM does not have wakeup capability in OMAP3. | 236 | * CAM does not have wakeup capability in OMAP3. |
@@ -270,9 +242,19 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, | |||
270 | } | 242 | } |
271 | 243 | ||
272 | /* | 244 | /* |
245 | * FIXME: we currently manage device-specific idle states | ||
246 | * for PER and CORE in combination with CPU-specific | ||
247 | * idle states. This is wrong, and device-specific | ||
248 | * idle management needs to be separated out into | ||
249 | * its own code. | ||
250 | */ | ||
251 | |||
252 | /* | ||
273 | * Prevent PER off if CORE is not in retention or off as this | 253 | * Prevent PER off if CORE is not in retention or off as this |
274 | * would disable PER wakeups completely. | 254 | * would disable PER wakeups completely. |
275 | */ | 255 | */ |
256 | cx = cpuidle_get_statedata(state); | ||
257 | core_next_state = cx->core_state; | ||
276 | per_next_state = per_saved_state = pwrdm_read_next_pwrst(per_pd); | 258 | per_next_state = per_saved_state = pwrdm_read_next_pwrst(per_pd); |
277 | if ((per_next_state == PWRDM_POWER_OFF) && | 259 | if ((per_next_state == PWRDM_POWER_OFF) && |
278 | (core_next_state > PWRDM_POWER_RET)) | 260 | (core_next_state > PWRDM_POWER_RET)) |
@@ -282,6 +264,8 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, | |||
282 | if (per_next_state != per_saved_state) | 264 | if (per_next_state != per_saved_state) |
283 | pwrdm_set_next_pwrst(per_pd, per_next_state); | 265 | pwrdm_set_next_pwrst(per_pd, per_next_state); |
284 | 266 | ||
267 | new_state = next_valid_state(dev, state); | ||
268 | |||
285 | select_state: | 269 | select_state: |
286 | dev->last_state = new_state; | 270 | dev->last_state = new_state; |
287 | ret = omap3_enter_idle(dev, new_state); | 271 | ret = omap3_enter_idle(dev, new_state); |
@@ -295,31 +279,6 @@ select_state: | |||
295 | 279 | ||
296 | DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev); | 280 | DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev); |
297 | 281 | ||
298 | /** | ||
299 | * omap3_cpuidle_update_states() - Update the cpuidle states | ||
300 | * @mpu_deepest_state: Enable states up to and including this for mpu domain | ||
301 | * @core_deepest_state: Enable states up to and including this for core domain | ||
302 | * | ||
303 | * This goes through the list of states available and enables and disables the | ||
304 | * validity of C states based on deepest state that can be achieved for the | ||
305 | * variable domain | ||
306 | */ | ||
307 | void omap3_cpuidle_update_states(u32 mpu_deepest_state, u32 core_deepest_state) | ||
308 | { | ||
309 | int i; | ||
310 | |||
311 | for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { | ||
312 | struct omap3_processor_cx *cx = &omap3_power_states[i]; | ||
313 | |||
314 | if ((cx->mpu_state >= mpu_deepest_state) && | ||
315 | (cx->core_state >= core_deepest_state)) { | ||
316 | cx->valid = 1; | ||
317 | } else { | ||
318 | cx->valid = 0; | ||
319 | } | ||
320 | } | ||
321 | } | ||
322 | |||
323 | void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) | 282 | void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) |
324 | { | 283 | { |
325 | int i; | 284 | int i; |
@@ -327,212 +286,109 @@ void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) | |||
327 | if (!cpuidle_board_params) | 286 | if (!cpuidle_board_params) |
328 | return; | 287 | return; |
329 | 288 | ||
330 | for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { | 289 | for (i = 0; i < OMAP3_NUM_STATES; i++) { |
331 | cpuidle_params_table[i].valid = | 290 | cpuidle_params_table[i].valid = cpuidle_board_params[i].valid; |
332 | cpuidle_board_params[i].valid; | 291 | cpuidle_params_table[i].exit_latency = |
333 | cpuidle_params_table[i].sleep_latency = | 292 | cpuidle_board_params[i].exit_latency; |
334 | cpuidle_board_params[i].sleep_latency; | 293 | cpuidle_params_table[i].target_residency = |
335 | cpuidle_params_table[i].wake_latency = | 294 | cpuidle_board_params[i].target_residency; |
336 | cpuidle_board_params[i].wake_latency; | ||
337 | cpuidle_params_table[i].threshold = | ||
338 | cpuidle_board_params[i].threshold; | ||
339 | } | 295 | } |
340 | return; | 296 | return; |
341 | } | 297 | } |
342 | 298 | ||
343 | /* omap3_init_power_states - Initialises the OMAP3 specific C states. | ||
344 | * | ||
345 | * Below is the desciption of each C state. | ||
346 | * C1 . MPU WFI + Core active | ||
347 | * C2 . MPU WFI + Core inactive | ||
348 | * C3 . MPU CSWR + Core inactive | ||
349 | * C4 . MPU OFF + Core inactive | ||
350 | * C5 . MPU CSWR + Core CSWR | ||
351 | * C6 . MPU OFF + Core CSWR | ||
352 | * C7 . MPU OFF + Core OFF | ||
353 | */ | ||
354 | void omap_init_power_states(void) | ||
355 | { | ||
356 | /* C1 . MPU WFI + Core active */ | ||
357 | omap3_power_states[OMAP3_STATE_C1].valid = | ||
358 | cpuidle_params_table[OMAP3_STATE_C1].valid; | ||
359 | omap3_power_states[OMAP3_STATE_C1].type = OMAP3_STATE_C1; | ||
360 | omap3_power_states[OMAP3_STATE_C1].sleep_latency = | ||
361 | cpuidle_params_table[OMAP3_STATE_C1].sleep_latency; | ||
362 | omap3_power_states[OMAP3_STATE_C1].wakeup_latency = | ||
363 | cpuidle_params_table[OMAP3_STATE_C1].wake_latency; | ||
364 | omap3_power_states[OMAP3_STATE_C1].threshold = | ||
365 | cpuidle_params_table[OMAP3_STATE_C1].threshold; | ||
366 | omap3_power_states[OMAP3_STATE_C1].mpu_state = PWRDM_POWER_ON; | ||
367 | omap3_power_states[OMAP3_STATE_C1].core_state = PWRDM_POWER_ON; | ||
368 | omap3_power_states[OMAP3_STATE_C1].flags = CPUIDLE_FLAG_TIME_VALID; | ||
369 | omap3_power_states[OMAP3_STATE_C1].desc = "MPU ON + CORE ON"; | ||
370 | |||
371 | /* C2 . MPU WFI + Core inactive */ | ||
372 | omap3_power_states[OMAP3_STATE_C2].valid = | ||
373 | cpuidle_params_table[OMAP3_STATE_C2].valid; | ||
374 | omap3_power_states[OMAP3_STATE_C2].type = OMAP3_STATE_C2; | ||
375 | omap3_power_states[OMAP3_STATE_C2].sleep_latency = | ||
376 | cpuidle_params_table[OMAP3_STATE_C2].sleep_latency; | ||
377 | omap3_power_states[OMAP3_STATE_C2].wakeup_latency = | ||
378 | cpuidle_params_table[OMAP3_STATE_C2].wake_latency; | ||
379 | omap3_power_states[OMAP3_STATE_C2].threshold = | ||
380 | cpuidle_params_table[OMAP3_STATE_C2].threshold; | ||
381 | omap3_power_states[OMAP3_STATE_C2].mpu_state = PWRDM_POWER_ON; | ||
382 | omap3_power_states[OMAP3_STATE_C2].core_state = PWRDM_POWER_ON; | ||
383 | omap3_power_states[OMAP3_STATE_C2].flags = CPUIDLE_FLAG_TIME_VALID | | ||
384 | CPUIDLE_FLAG_CHECK_BM; | ||
385 | omap3_power_states[OMAP3_STATE_C2].desc = "MPU ON + CORE ON"; | ||
386 | |||
387 | /* C3 . MPU CSWR + Core inactive */ | ||
388 | omap3_power_states[OMAP3_STATE_C3].valid = | ||
389 | cpuidle_params_table[OMAP3_STATE_C3].valid; | ||
390 | omap3_power_states[OMAP3_STATE_C3].type = OMAP3_STATE_C3; | ||
391 | omap3_power_states[OMAP3_STATE_C3].sleep_latency = | ||
392 | cpuidle_params_table[OMAP3_STATE_C3].sleep_latency; | ||
393 | omap3_power_states[OMAP3_STATE_C3].wakeup_latency = | ||
394 | cpuidle_params_table[OMAP3_STATE_C3].wake_latency; | ||
395 | omap3_power_states[OMAP3_STATE_C3].threshold = | ||
396 | cpuidle_params_table[OMAP3_STATE_C3].threshold; | ||
397 | omap3_power_states[OMAP3_STATE_C3].mpu_state = PWRDM_POWER_RET; | ||
398 | omap3_power_states[OMAP3_STATE_C3].core_state = PWRDM_POWER_ON; | ||
399 | omap3_power_states[OMAP3_STATE_C3].flags = CPUIDLE_FLAG_TIME_VALID | | ||
400 | CPUIDLE_FLAG_CHECK_BM; | ||
401 | omap3_power_states[OMAP3_STATE_C3].desc = "MPU RET + CORE ON"; | ||
402 | |||
403 | /* C4 . MPU OFF + Core inactive */ | ||
404 | omap3_power_states[OMAP3_STATE_C4].valid = | ||
405 | cpuidle_params_table[OMAP3_STATE_C4].valid; | ||
406 | omap3_power_states[OMAP3_STATE_C4].type = OMAP3_STATE_C4; | ||
407 | omap3_power_states[OMAP3_STATE_C4].sleep_latency = | ||
408 | cpuidle_params_table[OMAP3_STATE_C4].sleep_latency; | ||
409 | omap3_power_states[OMAP3_STATE_C4].wakeup_latency = | ||
410 | cpuidle_params_table[OMAP3_STATE_C4].wake_latency; | ||
411 | omap3_power_states[OMAP3_STATE_C4].threshold = | ||
412 | cpuidle_params_table[OMAP3_STATE_C4].threshold; | ||
413 | omap3_power_states[OMAP3_STATE_C4].mpu_state = PWRDM_POWER_OFF; | ||
414 | omap3_power_states[OMAP3_STATE_C4].core_state = PWRDM_POWER_ON; | ||
415 | omap3_power_states[OMAP3_STATE_C4].flags = CPUIDLE_FLAG_TIME_VALID | | ||
416 | CPUIDLE_FLAG_CHECK_BM; | ||
417 | omap3_power_states[OMAP3_STATE_C4].desc = "MPU OFF + CORE ON"; | ||
418 | |||
419 | /* C5 . MPU CSWR + Core CSWR*/ | ||
420 | omap3_power_states[OMAP3_STATE_C5].valid = | ||
421 | cpuidle_params_table[OMAP3_STATE_C5].valid; | ||
422 | omap3_power_states[OMAP3_STATE_C5].type = OMAP3_STATE_C5; | ||
423 | omap3_power_states[OMAP3_STATE_C5].sleep_latency = | ||
424 | cpuidle_params_table[OMAP3_STATE_C5].sleep_latency; | ||
425 | omap3_power_states[OMAP3_STATE_C5].wakeup_latency = | ||
426 | cpuidle_params_table[OMAP3_STATE_C5].wake_latency; | ||
427 | omap3_power_states[OMAP3_STATE_C5].threshold = | ||
428 | cpuidle_params_table[OMAP3_STATE_C5].threshold; | ||
429 | omap3_power_states[OMAP3_STATE_C5].mpu_state = PWRDM_POWER_RET; | ||
430 | omap3_power_states[OMAP3_STATE_C5].core_state = PWRDM_POWER_RET; | ||
431 | omap3_power_states[OMAP3_STATE_C5].flags = CPUIDLE_FLAG_TIME_VALID | | ||
432 | CPUIDLE_FLAG_CHECK_BM; | ||
433 | omap3_power_states[OMAP3_STATE_C5].desc = "MPU RET + CORE RET"; | ||
434 | |||
435 | /* C6 . MPU OFF + Core CSWR */ | ||
436 | omap3_power_states[OMAP3_STATE_C6].valid = | ||
437 | cpuidle_params_table[OMAP3_STATE_C6].valid; | ||
438 | omap3_power_states[OMAP3_STATE_C6].type = OMAP3_STATE_C6; | ||
439 | omap3_power_states[OMAP3_STATE_C6].sleep_latency = | ||
440 | cpuidle_params_table[OMAP3_STATE_C6].sleep_latency; | ||
441 | omap3_power_states[OMAP3_STATE_C6].wakeup_latency = | ||
442 | cpuidle_params_table[OMAP3_STATE_C6].wake_latency; | ||
443 | omap3_power_states[OMAP3_STATE_C6].threshold = | ||
444 | cpuidle_params_table[OMAP3_STATE_C6].threshold; | ||
445 | omap3_power_states[OMAP3_STATE_C6].mpu_state = PWRDM_POWER_OFF; | ||
446 | omap3_power_states[OMAP3_STATE_C6].core_state = PWRDM_POWER_RET; | ||
447 | omap3_power_states[OMAP3_STATE_C6].flags = CPUIDLE_FLAG_TIME_VALID | | ||
448 | CPUIDLE_FLAG_CHECK_BM; | ||
449 | omap3_power_states[OMAP3_STATE_C6].desc = "MPU OFF + CORE RET"; | ||
450 | |||
451 | /* C7 . MPU OFF + Core OFF */ | ||
452 | omap3_power_states[OMAP3_STATE_C7].valid = | ||
453 | cpuidle_params_table[OMAP3_STATE_C7].valid; | ||
454 | omap3_power_states[OMAP3_STATE_C7].type = OMAP3_STATE_C7; | ||
455 | omap3_power_states[OMAP3_STATE_C7].sleep_latency = | ||
456 | cpuidle_params_table[OMAP3_STATE_C7].sleep_latency; | ||
457 | omap3_power_states[OMAP3_STATE_C7].wakeup_latency = | ||
458 | cpuidle_params_table[OMAP3_STATE_C7].wake_latency; | ||
459 | omap3_power_states[OMAP3_STATE_C7].threshold = | ||
460 | cpuidle_params_table[OMAP3_STATE_C7].threshold; | ||
461 | omap3_power_states[OMAP3_STATE_C7].mpu_state = PWRDM_POWER_OFF; | ||
462 | omap3_power_states[OMAP3_STATE_C7].core_state = PWRDM_POWER_OFF; | ||
463 | omap3_power_states[OMAP3_STATE_C7].flags = CPUIDLE_FLAG_TIME_VALID | | ||
464 | CPUIDLE_FLAG_CHECK_BM; | ||
465 | omap3_power_states[OMAP3_STATE_C7].desc = "MPU OFF + CORE OFF"; | ||
466 | |||
467 | /* | ||
468 | * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot | ||
469 | * enable OFF mode in a stable form for previous revisions. | ||
470 | * we disable C7 state as a result. | ||
471 | */ | ||
472 | if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) { | ||
473 | omap3_power_states[OMAP3_STATE_C7].valid = 0; | ||
474 | cpuidle_params_table[OMAP3_STATE_C7].valid = 0; | ||
475 | pr_warn("%s: core off state C7 disabled due to i583\n", | ||
476 | __func__); | ||
477 | } | ||
478 | } | ||
479 | |||
480 | struct cpuidle_driver omap3_idle_driver = { | 299 | struct cpuidle_driver omap3_idle_driver = { |
481 | .name = "omap3_idle", | 300 | .name = "omap3_idle", |
482 | .owner = THIS_MODULE, | 301 | .owner = THIS_MODULE, |
483 | }; | 302 | }; |
484 | 303 | ||
304 | /* Helper to fill the C-state common data and register the driver_data */ | ||
305 | static inline struct omap3_idle_statedata *_fill_cstate( | ||
306 | struct cpuidle_device *dev, | ||
307 | int idx, const char *descr) | ||
308 | { | ||
309 | struct omap3_idle_statedata *cx = &omap3_idle_data[idx]; | ||
310 | struct cpuidle_state *state = &dev->states[idx]; | ||
311 | |||
312 | state->exit_latency = cpuidle_params_table[idx].exit_latency; | ||
313 | state->target_residency = cpuidle_params_table[idx].target_residency; | ||
314 | state->flags = CPUIDLE_FLAG_TIME_VALID; | ||
315 | state->enter = omap3_enter_idle_bm; | ||
316 | cx->valid = cpuidle_params_table[idx].valid; | ||
317 | sprintf(state->name, "C%d", idx + 1); | ||
318 | strncpy(state->desc, descr, CPUIDLE_DESC_LEN); | ||
319 | cpuidle_set_statedata(state, cx); | ||
320 | |||
321 | return cx; | ||
322 | } | ||
323 | |||
485 | /** | 324 | /** |
486 | * omap3_idle_init - Init routine for OMAP3 idle | 325 | * omap3_idle_init - Init routine for OMAP3 idle |
487 | * | 326 | * |
488 | * Registers the OMAP3 specific cpuidle driver with the cpuidle | 327 | * Registers the OMAP3 specific cpuidle driver to the cpuidle |
489 | * framework with the valid set of states. | 328 | * framework with the valid set of states. |
490 | */ | 329 | */ |
491 | int __init omap3_idle_init(void) | 330 | int __init omap3_idle_init(void) |
492 | { | 331 | { |
493 | int i, count = 0; | ||
494 | struct omap3_processor_cx *cx; | ||
495 | struct cpuidle_state *state; | ||
496 | struct cpuidle_device *dev; | 332 | struct cpuidle_device *dev; |
333 | struct omap3_idle_statedata *cx; | ||
497 | 334 | ||
498 | mpu_pd = pwrdm_lookup("mpu_pwrdm"); | 335 | mpu_pd = pwrdm_lookup("mpu_pwrdm"); |
499 | core_pd = pwrdm_lookup("core_pwrdm"); | 336 | core_pd = pwrdm_lookup("core_pwrdm"); |
500 | per_pd = pwrdm_lookup("per_pwrdm"); | 337 | per_pd = pwrdm_lookup("per_pwrdm"); |
501 | cam_pd = pwrdm_lookup("cam_pwrdm"); | 338 | cam_pd = pwrdm_lookup("cam_pwrdm"); |
502 | 339 | ||
503 | omap_init_power_states(); | ||
504 | cpuidle_register_driver(&omap3_idle_driver); | 340 | cpuidle_register_driver(&omap3_idle_driver); |
505 | |||
506 | dev = &per_cpu(omap3_idle_dev, smp_processor_id()); | 341 | dev = &per_cpu(omap3_idle_dev, smp_processor_id()); |
507 | 342 | ||
508 | for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { | 343 | /* C1 . MPU WFI + Core active */ |
509 | cx = &omap3_power_states[i]; | 344 | cx = _fill_cstate(dev, 0, "MPU ON + CORE ON"); |
510 | state = &dev->states[count]; | 345 | (&dev->states[0])->enter = omap3_enter_idle; |
511 | 346 | dev->safe_state = &dev->states[0]; | |
512 | if (!cx->valid) | 347 | cx->valid = 1; /* C1 is always valid */ |
513 | continue; | 348 | cx->mpu_state = PWRDM_POWER_ON; |
514 | cpuidle_set_statedata(state, cx); | 349 | cx->core_state = PWRDM_POWER_ON; |
515 | state->exit_latency = cx->sleep_latency + cx->wakeup_latency; | ||
516 | state->target_residency = cx->threshold; | ||
517 | state->flags = cx->flags; | ||
518 | state->enter = (state->flags & CPUIDLE_FLAG_CHECK_BM) ? | ||
519 | omap3_enter_idle_bm : omap3_enter_idle; | ||
520 | if (cx->type == OMAP3_STATE_C1) | ||
521 | dev->safe_state = state; | ||
522 | sprintf(state->name, "C%d", count+1); | ||
523 | strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); | ||
524 | count++; | ||
525 | } | ||
526 | 350 | ||
527 | if (!count) | 351 | /* C2 . MPU WFI + Core inactive */ |
528 | return -EINVAL; | 352 | cx = _fill_cstate(dev, 1, "MPU ON + CORE ON"); |
529 | dev->state_count = count; | 353 | cx->mpu_state = PWRDM_POWER_ON; |
354 | cx->core_state = PWRDM_POWER_ON; | ||
355 | |||
356 | /* C3 . MPU CSWR + Core inactive */ | ||
357 | cx = _fill_cstate(dev, 2, "MPU RET + CORE ON"); | ||
358 | cx->mpu_state = PWRDM_POWER_RET; | ||
359 | cx->core_state = PWRDM_POWER_ON; | ||
530 | 360 | ||
531 | if (enable_off_mode) | 361 | /* C4 . MPU OFF + Core inactive */ |
532 | omap3_cpuidle_update_states(PWRDM_POWER_OFF, PWRDM_POWER_OFF); | 362 | cx = _fill_cstate(dev, 3, "MPU OFF + CORE ON"); |
533 | else | 363 | cx->mpu_state = PWRDM_POWER_OFF; |
534 | omap3_cpuidle_update_states(PWRDM_POWER_RET, PWRDM_POWER_RET); | 364 | cx->core_state = PWRDM_POWER_ON; |
365 | |||
366 | /* C5 . MPU RET + Core RET */ | ||
367 | cx = _fill_cstate(dev, 4, "MPU RET + CORE RET"); | ||
368 | cx->mpu_state = PWRDM_POWER_RET; | ||
369 | cx->core_state = PWRDM_POWER_RET; | ||
370 | |||
371 | /* C6 . MPU OFF + Core RET */ | ||
372 | cx = _fill_cstate(dev, 5, "MPU OFF + CORE RET"); | ||
373 | cx->mpu_state = PWRDM_POWER_OFF; | ||
374 | cx->core_state = PWRDM_POWER_RET; | ||
375 | |||
376 | /* C7 . MPU OFF + Core OFF */ | ||
377 | cx = _fill_cstate(dev, 6, "MPU OFF + CORE OFF"); | ||
378 | /* | ||
379 | * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot | ||
380 | * enable OFF mode in a stable form for previous revisions. | ||
381 | * We disable C7 state as a result. | ||
382 | */ | ||
383 | if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) { | ||
384 | cx->valid = 0; | ||
385 | pr_warn("%s: core off state C7 disabled due to i583\n", | ||
386 | __func__); | ||
387 | } | ||
388 | cx->mpu_state = PWRDM_POWER_OFF; | ||
389 | cx->core_state = PWRDM_POWER_OFF; | ||
535 | 390 | ||
391 | dev->state_count = OMAP3_NUM_STATES; | ||
536 | if (cpuidle_register_device(dev)) { | 392 | if (cpuidle_register_device(dev)) { |
537 | printk(KERN_ERR "%s: CPUidle register device failed\n", | 393 | printk(KERN_ERR "%s: CPUidle register device failed\n", |
538 | __func__); | 394 | __func__); |
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 797bfd12b643..45bcfce77352 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h | |||
@@ -36,11 +36,16 @@ static inline int omap4_opp_init(void) | |||
36 | } | 36 | } |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | /* | ||
40 | * cpuidle mach specific parameters | ||
41 | * | ||
42 | * The board code can override the default C-states definition using | ||
43 | * omap3_pm_init_cpuidle | ||
44 | */ | ||
39 | struct cpuidle_params { | 45 | struct cpuidle_params { |
40 | u8 valid; | 46 | u32 exit_latency; /* exit_latency = sleep + wake-up latencies */ |
41 | u32 sleep_latency; | 47 | u32 target_residency; |
42 | u32 wake_latency; | 48 | u8 valid; /* validates the C-state */ |
43 | u32 threshold; | ||
44 | }; | 49 | }; |
45 | 50 | ||
46 | #if defined(CONFIG_PM) && defined(CONFIG_CPU_IDLE) | 51 | #if defined(CONFIG_PM) && defined(CONFIG_CPU_IDLE) |
@@ -73,10 +78,6 @@ extern u32 sleep_while_idle; | |||
73 | #define sleep_while_idle 0 | 78 | #define sleep_while_idle 0 |
74 | #endif | 79 | #endif |
75 | 80 | ||
76 | #if defined(CONFIG_CPU_IDLE) | ||
77 | extern void omap3_cpuidle_update_states(u32, u32); | ||
78 | #endif | ||
79 | |||
80 | #if defined(CONFIG_PM_DEBUG) && defined(CONFIG_DEBUG_FS) | 81 | #if defined(CONFIG_PM_DEBUG) && defined(CONFIG_DEBUG_FS) |
81 | extern void pm_dbg_update_time(struct powerdomain *pwrdm, int prev); | 82 | extern void pm_dbg_update_time(struct powerdomain *pwrdm, int prev); |
82 | extern int pm_dbg_regset_save(int reg_set); | 83 | extern int pm_dbg_regset_save(int reg_set); |
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 0c5e3a46a3ad..c155c9d1c82c 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c | |||
@@ -779,18 +779,6 @@ void omap3_pm_off_mode_enable(int enable) | |||
779 | else | 779 | else |
780 | state = PWRDM_POWER_RET; | 780 | state = PWRDM_POWER_RET; |
781 | 781 | ||
782 | #ifdef CONFIG_CPU_IDLE | ||
783 | /* | ||
784 | * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot | ||
785 | * enable OFF mode in a stable form for previous revisions, restrict | ||
786 | * instead to RET | ||
787 | */ | ||
788 | if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) | ||
789 | omap3_cpuidle_update_states(state, PWRDM_POWER_RET); | ||
790 | else | ||
791 | omap3_cpuidle_update_states(state, state); | ||
792 | #endif | ||
793 | |||
794 | list_for_each_entry(pwrst, &pwrst_list, node) { | 782 | list_for_each_entry(pwrst, &pwrst_list, node) { |
795 | if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583) && | 783 | if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583) && |
796 | pwrst->pwrdm == core_pwrdm && | 784 | pwrst->pwrdm == core_pwrdm && |
@@ -895,8 +883,6 @@ static int __init omap3_pm_init(void) | |||
895 | 883 | ||
896 | pm_errata_configure(); | 884 | pm_errata_configure(); |
897 | 885 | ||
898 | printk(KERN_ERR "Power Management for TI OMAP3.\n"); | ||
899 | |||
900 | /* XXX prcm_setup_regs needs to be before enabling hw | 886 | /* XXX prcm_setup_regs needs to be before enabling hw |
901 | * supervised mode for powerdomains */ | 887 | * supervised mode for powerdomains */ |
902 | prcm_setup_regs(); | 888 | prcm_setup_regs(); |
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index 425dfa5d6e12..bb571bcdb8f2 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c | |||
@@ -325,9 +325,7 @@ int __init ar7_gpio_init(void) | |||
325 | size = 0x1f; | 325 | size = 0x1f; |
326 | } | 326 | } |
327 | 327 | ||
328 | gpch->regs = ioremap_nocache(AR7_REGS_GPIO, | 328 | gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size); |
329 | AR7_REGS_GPIO + 0x10); | ||
330 | |||
331 | if (!gpch->regs) { | 329 | if (!gpch->regs) { |
332 | printk(KERN_ERR "%s: failed to ioremap regs\n", | 330 | printk(KERN_ERR "%s: failed to ioremap regs\n", |
333 | gpch->chip.label); | 331 | gpch->chip.label); |
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 655f849bd08d..7aa37ddfca4b 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h | |||
@@ -5,7 +5,9 @@ | |||
5 | #include <asm/cache.h> | 5 | #include <asm/cache.h> |
6 | #include <asm-generic/dma-coherent.h> | 6 | #include <asm-generic/dma-coherent.h> |
7 | 7 | ||
8 | #ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */ | ||
8 | #include <dma-coherence.h> | 9 | #include <dma-coherence.h> |
10 | #endif | ||
9 | 11 | ||
10 | extern struct dma_map_ops *mips_dma_map_ops; | 12 | extern struct dma_map_ops *mips_dma_map_ops; |
11 | 13 | ||
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 71350f7f2d88..e9b3af27d844 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c | |||
@@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs) | |||
374 | unsigned long dvpret = dvpe(); | 374 | unsigned long dvpret = dvpe(); |
375 | #endif /* CONFIG_MIPS_MT_SMTC */ | 375 | #endif /* CONFIG_MIPS_MT_SMTC */ |
376 | 376 | ||
377 | notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV); | 377 | if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) |
378 | sig = 0; | ||
378 | 379 | ||
379 | console_verbose(); | 380 | console_verbose(); |
380 | spin_lock_irq(&die_lock); | 381 | spin_lock_irq(&die_lock); |
@@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs) | |||
383 | mips_mt_regdump(dvpret); | 384 | mips_mt_regdump(dvpret); |
384 | #endif /* CONFIG_MIPS_MT_SMTC */ | 385 | #endif /* CONFIG_MIPS_MT_SMTC */ |
385 | 386 | ||
386 | if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) | ||
387 | sig = 0; | ||
388 | |||
389 | printk("%s[#%d]:\n", str, ++die_counter); | 387 | printk("%s[#%d]:\n", str, ++die_counter); |
390 | show_registers(regs); | 388 | show_registers(regs); |
391 | add_taint(TAINT_DIE); | 389 | add_taint(TAINT_DIE); |
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index 37de05d595e7..6c47dfeb7be3 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c | |||
@@ -185,7 +185,7 @@ int __init rb532_gpio_init(void) | |||
185 | struct resource *r; | 185 | struct resource *r; |
186 | 186 | ||
187 | r = rb532_gpio_reg0_res; | 187 | r = rb532_gpio_reg0_res; |
188 | rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start); | 188 | rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r)); |
189 | 189 | ||
190 | if (!rb532_gpio_chip->regbase) { | 190 | if (!rb532_gpio_chip->regbase) { |
191 | printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); | 191 | printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); |
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 188272934cfb..104faa8aa23c 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c | |||
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { | |||
318 | .end = mpc83xx_suspend_end, | 318 | .end = mpc83xx_suspend_end, |
319 | }; | 319 | }; |
320 | 320 | ||
321 | static struct of_device_id pmc_match[]; | ||
321 | static int pmc_probe(struct platform_device *ofdev) | 322 | static int pmc_probe(struct platform_device *ofdev) |
322 | { | 323 | { |
324 | const struct of_device_id *match; | ||
323 | struct device_node *np = ofdev->dev.of_node; | 325 | struct device_node *np = ofdev->dev.of_node; |
324 | struct resource res; | 326 | struct resource res; |
325 | struct pmc_type *type; | 327 | struct pmc_type *type; |
326 | int ret = 0; | 328 | int ret = 0; |
327 | 329 | ||
328 | if (!ofdev->dev.of_match) | 330 | match = of_match_device(pmc_match, &ofdev->dev); |
331 | if (!match) | ||
329 | return -EINVAL; | 332 | return -EINVAL; |
330 | 333 | ||
331 | type = ofdev->dev.of_match->data; | 334 | type = match->data; |
332 | 335 | ||
333 | if (!of_device_is_available(np)) | 336 | if (!of_device_is_available(np)) |
334 | return -ENODEV; | 337 | return -ENODEV; |
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index d5679dc1e20f..01cd2f089512 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c | |||
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi, | |||
304 | return 0; | 304 | return 0; |
305 | } | 305 | } |
306 | 306 | ||
307 | static const struct of_device_id fsl_of_msi_ids[]; | ||
307 | static int __devinit fsl_of_msi_probe(struct platform_device *dev) | 308 | static int __devinit fsl_of_msi_probe(struct platform_device *dev) |
308 | { | 309 | { |
310 | const struct of_device_id *match; | ||
309 | struct fsl_msi *msi; | 311 | struct fsl_msi *msi; |
310 | struct resource res; | 312 | struct resource res; |
311 | int err, i, j, irq_index, count; | 313 | int err, i, j, irq_index, count; |
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) | |||
316 | u32 offset; | 318 | u32 offset; |
317 | static const u32 all_avail[] = { 0, NR_MSI_IRQS }; | 319 | static const u32 all_avail[] = { 0, NR_MSI_IRQS }; |
318 | 320 | ||
319 | if (!dev->dev.of_match) | 321 | match = of_match_device(fsl_of_msi_ids, &dev->dev); |
322 | if (!match) | ||
320 | return -EINVAL; | 323 | return -EINVAL; |
321 | features = dev->dev.of_match->data; | 324 | features = match->data; |
322 | 325 | ||
323 | printk(KERN_DEBUG "Setting up Freescale MSI support\n"); | 326 | printk(KERN_DEBUG "Setting up Freescale MSI support\n"); |
324 | 327 | ||
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index f679c57644d5..1e34f29e58bb 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c | |||
@@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op) | |||
165 | return 0; | 165 | return 0; |
166 | } | 166 | } |
167 | 167 | ||
168 | static struct of_device_id __initdata apc_match[] = { | 168 | static struct of_device_id apc_match[] = { |
169 | { | 169 | { |
170 | .name = APC_OBPNAME, | 170 | .name = APC_OBPNAME, |
171 | }, | 171 | }, |
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c index 948068a083fc..d1840dbdaa2f 100644 --- a/arch/sparc/kernel/pci_sabre.c +++ b/arch/sparc/kernel/pci_sabre.c | |||
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm, | |||
452 | sabre_scan_bus(pbm, &op->dev); | 452 | sabre_scan_bus(pbm, &op->dev); |
453 | } | 453 | } |
454 | 454 | ||
455 | static const struct of_device_id sabre_match[]; | ||
455 | static int __devinit sabre_probe(struct platform_device *op) | 456 | static int __devinit sabre_probe(struct platform_device *op) |
456 | { | 457 | { |
458 | const struct of_device_id *match; | ||
457 | const struct linux_prom64_registers *pr_regs; | 459 | const struct linux_prom64_registers *pr_regs; |
458 | struct device_node *dp = op->dev.of_node; | 460 | struct device_node *dp = op->dev.of_node; |
459 | struct pci_pbm_info *pbm; | 461 | struct pci_pbm_info *pbm; |
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op) | |||
463 | const u32 *vdma; | 465 | const u32 *vdma; |
464 | u64 clear_irq; | 466 | u64 clear_irq; |
465 | 467 | ||
466 | hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL); | 468 | match = of_match_device(sabre_match, &op->dev); |
469 | hummingbird_p = match && (match->data != NULL); | ||
467 | if (!hummingbird_p) { | 470 | if (!hummingbird_p) { |
468 | struct device_node *cpu_dp; | 471 | struct device_node *cpu_dp; |
469 | 472 | ||
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index fecfcb2063c8..283fbc329a43 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c | |||
@@ -1458,11 +1458,15 @@ out_err: | |||
1458 | return err; | 1458 | return err; |
1459 | } | 1459 | } |
1460 | 1460 | ||
1461 | static const struct of_device_id schizo_match[]; | ||
1461 | static int __devinit schizo_probe(struct platform_device *op) | 1462 | static int __devinit schizo_probe(struct platform_device *op) |
1462 | { | 1463 | { |
1463 | if (!op->dev.of_match) | 1464 | const struct of_device_id *match; |
1465 | |||
1466 | match = of_match_device(schizo_match, &op->dev); | ||
1467 | if (!match) | ||
1464 | return -EINVAL; | 1468 | return -EINVAL; |
1465 | return __schizo_init(op, (unsigned long) op->dev.of_match->data); | 1469 | return __schizo_init(op, (unsigned long)match->data); |
1466 | } | 1470 | } |
1467 | 1471 | ||
1468 | /* The ordering of this table is very important. Some Tomatillo | 1472 | /* The ordering of this table is very important. Some Tomatillo |
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c index 93d7b4465f8d..6a585d393580 100644 --- a/arch/sparc/kernel/pmc.c +++ b/arch/sparc/kernel/pmc.c | |||
@@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op) | |||
69 | return 0; | 69 | return 0; |
70 | } | 70 | } |
71 | 71 | ||
72 | static struct of_device_id __initdata pmc_match[] = { | 72 | static struct of_device_id pmc_match[] = { |
73 | { | 73 | { |
74 | .name = PMC_OBPNAME, | 74 | .name = PMC_OBPNAME, |
75 | }, | 75 | }, |
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 91c10fb70858..850a1360c0d6 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c | |||
@@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE; | |||
53 | void __cpuinit smp_store_cpu_info(int id) | 53 | void __cpuinit smp_store_cpu_info(int id) |
54 | { | 54 | { |
55 | int cpu_node; | 55 | int cpu_node; |
56 | int mid; | ||
56 | 57 | ||
57 | cpu_data(id).udelay_val = loops_per_jiffy; | 58 | cpu_data(id).udelay_val = loops_per_jiffy; |
58 | 59 | ||
@@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id) | |||
60 | cpu_data(id).clock_tick = prom_getintdefault(cpu_node, | 61 | cpu_data(id).clock_tick = prom_getintdefault(cpu_node, |
61 | "clock-frequency", 0); | 62 | "clock-frequency", 0); |
62 | cpu_data(id).prom_node = cpu_node; | 63 | cpu_data(id).prom_node = cpu_node; |
63 | cpu_data(id).mid = cpu_get_hwmid(cpu_node); | 64 | mid = cpu_get_hwmid(cpu_node); |
64 | 65 | ||
65 | if (cpu_data(id).mid < 0) | 66 | if (mid < 0) { |
66 | panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); | 67 | printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node); |
68 | mid = 0; | ||
69 | } | ||
70 | cpu_data(id).mid = mid; | ||
67 | } | 71 | } |
68 | 72 | ||
69 | void __init smp_cpus_done(unsigned int max_cpus) | 73 | void __init smp_cpus_done(unsigned int max_cpus) |
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 4e236391b635..96046a4024c2 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c | |||
@@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op) | |||
168 | return 0; | 168 | return 0; |
169 | } | 169 | } |
170 | 170 | ||
171 | static struct of_device_id __initdata clock_match[] = { | 171 | static struct of_device_id clock_match[] = { |
172 | { | 172 | { |
173 | .name = "eeprom", | 173 | .name = "eeprom", |
174 | }, | 174 | }, |
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 3632cb34e914..0084c3361e15 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S | |||
@@ -289,10 +289,16 @@ cc_end_cruft: | |||
289 | 289 | ||
290 | /* Also, handle the alignment code out of band. */ | 290 | /* Also, handle the alignment code out of band. */ |
291 | cc_dword_align: | 291 | cc_dword_align: |
292 | cmp %g1, 6 | 292 | cmp %g1, 16 |
293 | bl,a ccte | 293 | bge 1f |
294 | srl %g1, 1, %o3 | ||
295 | 2: cmp %o3, 0 | ||
296 | be,a ccte | ||
294 | andcc %g1, 0xf, %o3 | 297 | andcc %g1, 0xf, %o3 |
295 | andcc %o0, 0x1, %g0 | 298 | andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits) |
299 | be,a 2b | ||
300 | srl %o3, 1, %o3 | ||
301 | 1: andcc %o0, 0x1, %g0 | ||
296 | bne ccslow | 302 | bne ccslow |
297 | andcc %o0, 0x2, %g0 | 303 | andcc %o0, 0x2, %g0 |
298 | be 1f | 304 | be 1f |
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 6ea77979531c..42827cafa6af 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c | |||
@@ -5,6 +5,7 @@ | |||
5 | 5 | ||
6 | #include <stdio.h> | 6 | #include <stdio.h> |
7 | #include <stdlib.h> | 7 | #include <stdlib.h> |
8 | #include <unistd.h> | ||
8 | #include <errno.h> | 9 | #include <errno.h> |
9 | #include <signal.h> | 10 | #include <signal.h> |
10 | #include <string.h> | 11 | #include <string.h> |
@@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len) | |||
75 | host.release, host.version, host.machine); | 76 | host.release, host.version, host.machine); |
76 | } | 77 | } |
77 | 78 | ||
79 | /* | ||
80 | * We cannot use glibc's abort(). It makes use of tgkill() which | ||
81 | * has no effect within UML's kernel threads. | ||
82 | * After that glibc would execute an invalid instruction to kill | ||
83 | * the calling process and UML crashes with SIGSEGV. | ||
84 | */ | ||
85 | static inline void __attribute__ ((noreturn)) uml_abort(void) | ||
86 | { | ||
87 | sigset_t sig; | ||
88 | |||
89 | fflush(NULL); | ||
90 | |||
91 | if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT)) | ||
92 | sigprocmask(SIG_UNBLOCK, &sig, 0); | ||
93 | |||
94 | for (;;) | ||
95 | if (kill(getpid(), SIGABRT) < 0) | ||
96 | exit(127); | ||
97 | } | ||
98 | |||
78 | void os_dump_core(void) | 99 | void os_dump_core(void) |
79 | { | 100 | { |
80 | int pid; | 101 | int pid; |
@@ -116,5 +137,5 @@ void os_dump_core(void) | |||
116 | while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) | 137 | while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) |
117 | os_kill_ptraced_process(pid, 0); | 138 | os_kill_ptraced_process(pid, 0); |
118 | 139 | ||
119 | abort(); | 140 | uml_abort(); |
120 | } | 141 | } |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index d87988bacf3e..34595d5e1038 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -78,6 +78,7 @@ | |||
78 | #define APIC_DEST_LOGICAL 0x00800 | 78 | #define APIC_DEST_LOGICAL 0x00800 |
79 | #define APIC_DEST_PHYSICAL 0x00000 | 79 | #define APIC_DEST_PHYSICAL 0x00000 |
80 | #define APIC_DM_FIXED 0x00000 | 80 | #define APIC_DM_FIXED 0x00000 |
81 | #define APIC_DM_FIXED_MASK 0x00700 | ||
81 | #define APIC_DM_LOWEST 0x00100 | 82 | #define APIC_DM_LOWEST 0x00100 |
82 | #define APIC_DM_SMI 0x00200 | 83 | #define APIC_DM_SMI 0x00200 |
83 | #define APIC_DM_REMRD 0x00300 | 84 | #define APIC_DM_REMRD 0x00300 |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 7db7723d1f32..d56187c6b838 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
299 | /* Install a pte for a particular vaddr in kernel space. */ | 299 | /* Install a pte for a particular vaddr in kernel space. */ |
300 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); | 300 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); |
301 | 301 | ||
302 | extern void native_pagetable_reserve(u64 start, u64 end); | ||
302 | #ifdef CONFIG_X86_32 | 303 | #ifdef CONFIG_X86_32 |
303 | extern void native_pagetable_setup_start(pgd_t *base); | 304 | extern void native_pagetable_setup_start(pgd_t *base); |
304 | extern void native_pagetable_setup_done(pgd_t *base); | 305 | extern void native_pagetable_setup_done(pgd_t *base); |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 3e094af443c3..130f1eeee5fe 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -94,6 +94,8 @@ | |||
94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ | 94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ |
95 | #define COMPLETE_THRESHOLD 5 | 95 | #define COMPLETE_THRESHOLD 5 |
96 | 96 | ||
97 | #define UV_LB_SUBNODEID 0x10 | ||
98 | |||
97 | /* | 99 | /* |
98 | * number of entries in the destination side payload queue | 100 | * number of entries in the destination side payload queue |
99 | */ | 101 | */ |
@@ -124,7 +126,7 @@ | |||
124 | * The distribution specification (32 bytes) is interpreted as a 256-bit | 126 | * The distribution specification (32 bytes) is interpreted as a 256-bit |
125 | * distribution vector. Adjacent bits correspond to consecutive even numbered | 127 | * distribution vector. Adjacent bits correspond to consecutive even numbered |
126 | * nodeIDs. The result of adding the index of a given bit to the 15-bit | 128 | * nodeIDs. The result of adding the index of a given bit to the 15-bit |
127 | * 'base_dest_nodeid' field of the header corresponds to the | 129 | * 'base_dest_nasid' field of the header corresponds to the |
128 | * destination nodeID associated with that specified bit. | 130 | * destination nodeID associated with that specified bit. |
129 | */ | 131 | */ |
130 | struct bau_target_uvhubmask { | 132 | struct bau_target_uvhubmask { |
@@ -176,7 +178,7 @@ struct bau_msg_payload { | |||
176 | struct bau_msg_header { | 178 | struct bau_msg_header { |
177 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ | 179 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
178 | /* bits 5:0 */ | 180 | /* bits 5:0 */ |
179 | unsigned int base_dest_nodeid:15; /* nasid of the */ | 181 | unsigned int base_dest_nasid:15; /* nasid of the */ |
180 | /* bits 20:6 */ /* first bit in uvhub map */ | 182 | /* bits 20:6 */ /* first bit in uvhub map */ |
181 | unsigned int command:8; /* message type */ | 183 | unsigned int command:8; /* message type */ |
182 | /* bits 28:21 */ | 184 | /* bits 28:21 */ |
@@ -378,6 +380,10 @@ struct ptc_stats { | |||
378 | unsigned long d_rcanceled; /* number of messages canceled by resets */ | 380 | unsigned long d_rcanceled; /* number of messages canceled by resets */ |
379 | }; | 381 | }; |
380 | 382 | ||
383 | struct hub_and_pnode { | ||
384 | short uvhub; | ||
385 | short pnode; | ||
386 | }; | ||
381 | /* | 387 | /* |
382 | * one per-cpu; to locate the software tables | 388 | * one per-cpu; to locate the software tables |
383 | */ | 389 | */ |
@@ -399,10 +405,12 @@ struct bau_control { | |||
399 | int baudisabled; | 405 | int baudisabled; |
400 | int set_bau_off; | 406 | int set_bau_off; |
401 | short cpu; | 407 | short cpu; |
408 | short osnode; | ||
402 | short uvhub_cpu; | 409 | short uvhub_cpu; |
403 | short uvhub; | 410 | short uvhub; |
404 | short cpus_in_socket; | 411 | short cpus_in_socket; |
405 | short cpus_in_uvhub; | 412 | short cpus_in_uvhub; |
413 | short partition_base_pnode; | ||
406 | unsigned short message_number; | 414 | unsigned short message_number; |
407 | unsigned short uvhub_quiesce; | 415 | unsigned short uvhub_quiesce; |
408 | short socket_acknowledge_count[DEST_Q_SIZE]; | 416 | short socket_acknowledge_count[DEST_Q_SIZE]; |
@@ -422,15 +430,16 @@ struct bau_control { | |||
422 | int congested_period; | 430 | int congested_period; |
423 | cycles_t period_time; | 431 | cycles_t period_time; |
424 | long period_requests; | 432 | long period_requests; |
433 | struct hub_and_pnode *target_hub_and_pnode; | ||
425 | }; | 434 | }; |
426 | 435 | ||
427 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) | 436 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) |
428 | { | 437 | { |
429 | return constant_test_bit(uvhub, &dstp->bits[0]); | 438 | return constant_test_bit(uvhub, &dstp->bits[0]); |
430 | } | 439 | } |
431 | static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) | 440 | static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) |
432 | { | 441 | { |
433 | __set_bit(uvhub, &dstp->bits[0]); | 442 | __set_bit(pnode, &dstp->bits[0]); |
434 | } | 443 | } |
435 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, | 444 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, |
436 | int nbits) | 445 | int nbits) |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741c2335..4298002d0c83 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -398,6 +398,8 @@ struct uv_blade_info { | |||
398 | unsigned short nr_online_cpus; | 398 | unsigned short nr_online_cpus; |
399 | unsigned short pnode; | 399 | unsigned short pnode; |
400 | short memory_nid; | 400 | short memory_nid; |
401 | spinlock_t nmi_lock; | ||
402 | unsigned long nmi_count; | ||
401 | }; | 403 | }; |
402 | extern struct uv_blade_info *uv_blade_info; | 404 | extern struct uv_blade_info *uv_blade_info; |
403 | extern short *uv_node_to_blade; | 405 | extern short *uv_node_to_blade; |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafeac7455..f5bb64a823d7 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV MMR definitions | 6 | * SGI UV MMR definitions |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _ASM_X86_UV_UV_MMRS_H | 11 | #ifndef _ASM_X86_UV_UV_MMRS_H |
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { | |||
1099 | } s; | 1099 | } s; |
1100 | }; | 1100 | }; |
1101 | 1101 | ||
1102 | /* ========================================================================= */ | ||
1103 | /* UVH_SCRATCH5 */ | ||
1104 | /* ========================================================================= */ | ||
1105 | #define UVH_SCRATCH5 0x2d0200UL | ||
1106 | #define UVH_SCRATCH5_32 0x00778 | ||
1107 | |||
1108 | #define UVH_SCRATCH5_SCRATCH5_SHFT 0 | ||
1109 | #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL | ||
1110 | union uvh_scratch5_u { | ||
1111 | unsigned long v; | ||
1112 | struct uvh_scratch5_s { | ||
1113 | unsigned long scratch5 : 64; /* RW, W1CS */ | ||
1114 | } s; | ||
1115 | }; | ||
1102 | 1116 | ||
1103 | #endif /* __ASM_UV_MMRS_X86_H__ */ | 1117 | #endif /* __ASM_UV_MMRS_X86_H__ */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 643ebf2e2ad8..d3d859035af9 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -68,6 +68,17 @@ struct x86_init_oem { | |||
68 | }; | 68 | }; |
69 | 69 | ||
70 | /** | 70 | /** |
71 | * struct x86_init_mapping - platform specific initial kernel pagetable setup | ||
72 | * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage | ||
73 | * | ||
74 | * For more details on the purpose of this hook, look in | ||
75 | * init_memory_mapping and the commit that added it. | ||
76 | */ | ||
77 | struct x86_init_mapping { | ||
78 | void (*pagetable_reserve)(u64 start, u64 end); | ||
79 | }; | ||
80 | |||
81 | /** | ||
71 | * struct x86_init_paging - platform specific paging functions | 82 | * struct x86_init_paging - platform specific paging functions |
72 | * @pagetable_setup_start: platform specific pre paging_init() call | 83 | * @pagetable_setup_start: platform specific pre paging_init() call |
73 | * @pagetable_setup_done: platform specific post paging_init() call | 84 | * @pagetable_setup_done: platform specific post paging_init() call |
@@ -123,6 +134,7 @@ struct x86_init_ops { | |||
123 | struct x86_init_mpparse mpparse; | 134 | struct x86_init_mpparse mpparse; |
124 | struct x86_init_irqs irqs; | 135 | struct x86_init_irqs irqs; |
125 | struct x86_init_oem oem; | 136 | struct x86_init_oem oem; |
137 | struct x86_init_mapping mapping; | ||
126 | struct x86_init_paging paging; | 138 | struct x86_init_paging paging; |
127 | struct x86_init_timers timers; | 139 | struct x86_init_timers timers; |
128 | struct x86_init_iommu iommu; | 140 | struct x86_init_iommu iommu; |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0fc095..7acd2d2ac965 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -37,6 +37,13 @@ | |||
37 | #include <asm/smp.h> | 37 | #include <asm/smp.h> |
38 | #include <asm/x86_init.h> | 38 | #include <asm/x86_init.h> |
39 | #include <asm/emergency-restart.h> | 39 | #include <asm/emergency-restart.h> |
40 | #include <asm/nmi.h> | ||
41 | |||
42 | /* BMC sets a bit this MMR non-zero before sending an NMI */ | ||
43 | #define UVH_NMI_MMR UVH_SCRATCH5 | ||
44 | #define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) | ||
45 | #define UV_NMI_PENDING_MASK (1UL << 63) | ||
46 | DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); | ||
40 | 47 | ||
41 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 48 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
42 | 49 | ||
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) | |||
642 | */ | 649 | */ |
643 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 650 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) |
644 | { | 651 | { |
652 | unsigned long real_uv_nmi; | ||
653 | int bid; | ||
654 | |||
645 | if (reason != DIE_NMIUNKNOWN) | 655 | if (reason != DIE_NMIUNKNOWN) |
646 | return NOTIFY_OK; | 656 | return NOTIFY_OK; |
647 | 657 | ||
648 | if (in_crash_kexec) | 658 | if (in_crash_kexec) |
649 | /* do nothing if entering the crash kernel */ | 659 | /* do nothing if entering the crash kernel */ |
650 | return NOTIFY_OK; | 660 | return NOTIFY_OK; |
661 | |||
651 | /* | 662 | /* |
652 | * Use a lock so only one cpu prints at a time | 663 | * Each blade has an MMR that indicates when an NMI has been sent |
653 | * to prevent intermixed output. | 664 | * to cpus on the blade. If an NMI is detected, atomically |
665 | * clear the MMR and update a per-blade NMI count used to | ||
666 | * cause each cpu on the blade to notice a new NMI. | ||
667 | */ | ||
668 | bid = uv_numa_blade_id(); | ||
669 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
670 | |||
671 | if (unlikely(real_uv_nmi)) { | ||
672 | spin_lock(&uv_blade_info[bid].nmi_lock); | ||
673 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
674 | if (real_uv_nmi) { | ||
675 | uv_blade_info[bid].nmi_count++; | ||
676 | uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); | ||
677 | } | ||
678 | spin_unlock(&uv_blade_info[bid].nmi_lock); | ||
679 | } | ||
680 | |||
681 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) | ||
682 | return NOTIFY_DONE; | ||
683 | |||
684 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; | ||
685 | |||
686 | /* | ||
687 | * Use a lock so only one cpu prints at a time. | ||
688 | * This prevents intermixed output. | ||
654 | */ | 689 | */ |
655 | spin_lock(&uv_nmi_lock); | 690 | spin_lock(&uv_nmi_lock); |
656 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | 691 | pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); |
657 | dump_stack(); | 692 | dump_stack(); |
658 | spin_unlock(&uv_nmi_lock); | 693 | spin_unlock(&uv_nmi_lock); |
659 | 694 | ||
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
661 | } | 696 | } |
662 | 697 | ||
663 | static struct notifier_block uv_dump_stack_nmi_nb = { | 698 | static struct notifier_block uv_dump_stack_nmi_nb = { |
664 | .notifier_call = uv_handle_nmi | 699 | .notifier_call = uv_handle_nmi, |
700 | .priority = NMI_LOCAL_LOW_PRIOR - 1, | ||
665 | }; | 701 | }; |
666 | 702 | ||
667 | void uv_register_nmi_notifier(void) | 703 | void uv_register_nmi_notifier(void) |
@@ -720,8 +756,9 @@ void __init uv_system_init(void) | |||
720 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 756 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); |
721 | 757 | ||
722 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 758 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
723 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 759 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
724 | BUG_ON(!uv_blade_info); | 760 | BUG_ON(!uv_blade_info); |
761 | |||
725 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | 762 | for (blade = 0; blade < uv_num_possible_blades(); blade++) |
726 | uv_blade_info[blade].memory_nid = -1; | 763 | uv_blade_info[blade].memory_nid = -1; |
727 | 764 | ||
@@ -747,6 +784,7 @@ void __init uv_system_init(void) | |||
747 | uv_blade_info[blade].pnode = pnode; | 784 | uv_blade_info[blade].pnode = pnode; |
748 | uv_blade_info[blade].nr_possible_cpus = 0; | 785 | uv_blade_info[blade].nr_possible_cpus = 0; |
749 | uv_blade_info[blade].nr_online_cpus = 0; | 786 | uv_blade_info[blade].nr_online_cpus = 0; |
787 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | ||
750 | max_pnode = max(pnode, max_pnode); | 788 | max_pnode = max(pnode, max_pnode); |
751 | blade++; | 789 | blade++; |
752 | } | 790 | } |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bb9eb29a52dd..6f9d1f6063e9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
613 | #endif | 613 | #endif |
614 | 614 | ||
615 | /* As a rule processors have APIC timer running in deep C states */ | 615 | /* As a rule processors have APIC timer running in deep C states */ |
616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) | 616 | if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) |
617 | set_cpu_cap(c, X86_FEATURE_ARAT); | 617 | set_cpu_cap(c, X86_FEATURE_ARAT); |
618 | 618 | ||
619 | /* | 619 | /* |
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev); | |||
698 | */ | 698 | */ |
699 | 699 | ||
700 | const int amd_erratum_400[] = | 700 | const int amd_erratum_400[] = |
701 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), | 701 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), |
702 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); | 702 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); |
703 | EXPORT_SYMBOL_GPL(amd_erratum_400); | 703 | EXPORT_SYMBOL_GPL(amd_erratum_400); |
704 | 704 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b5596e..bb0adad35143 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -509,6 +509,7 @@ recurse: | |||
509 | out_free: | 509 | out_free: |
510 | if (b) { | 510 | if (b) { |
511 | kobject_put(&b->kobj); | 511 | kobject_put(&b->kobj); |
512 | list_del(&b->miscj); | ||
512 | kfree(b); | 513 | kfree(b); |
513 | } | 514 | } |
514 | return err; | 515 | return err; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9da97f..0f034460260d 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
446 | */ | 446 | */ |
447 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 447 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
448 | 448 | ||
449 | h = lvtthmr_init; | ||
449 | /* | 450 | /* |
450 | * The initial value of thermal LVT entries on all APs always reads | 451 | * The initial value of thermal LVT entries on all APs always reads |
451 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | 452 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI |
452 | * sequence to them and LVT registers are reset to 0s except for | 453 | * sequence to them and LVT registers are reset to 0s except for |
453 | * the mask bits which are set to 1s when APs receive INIT IPI. | 454 | * the mask bits which are set to 1s when APs receive INIT IPI. |
454 | * Always restore the value that BIOS has programmed on AP based on | 455 | * If BIOS takes over the thermal interrupt and sets its interrupt |
455 | * BSP's info we saved since BIOS is always setting the same value | 456 | * delivery mode to SMI (not fixed), it restores the value that the |
456 | * for all threads/cores | 457 | * BIOS has programmed on AP based on BSP's info we saved since BIOS |
458 | * is always setting the same value for all threads/cores. | ||
457 | */ | 459 | */ |
458 | apic_write(APIC_LVTTHMR, lvtthmr_init); | 460 | if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) |
461 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
459 | 462 | ||
460 | h = lvtthmr_init; | ||
461 | 463 | ||
462 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 464 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
463 | printk(KERN_DEBUG | 465 | printk(KERN_DEBUG |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c969fd9d1566..f1a6244d7d93 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1183 | struct pt_regs *regs) | 1183 | struct pt_regs *regs) |
1184 | { | 1184 | { |
1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
1186 | unsigned long flags; | ||
1186 | 1187 | ||
1187 | /* This is possible if op is under delayed unoptimizing */ | 1188 | /* This is possible if op is under delayed unoptimizing */ |
1188 | if (kprobe_disabled(&op->kp)) | 1189 | if (kprobe_disabled(&op->kp)) |
1189 | return; | 1190 | return; |
1190 | 1191 | ||
1191 | preempt_disable(); | 1192 | local_irq_save(flags); |
1192 | if (kprobe_running()) { | 1193 | if (kprobe_running()) { |
1193 | kprobes_inc_nmissed_count(&op->kp); | 1194 | kprobes_inc_nmissed_count(&op->kp); |
1194 | } else { | 1195 | } else { |
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1207 | opt_pre_handler(&op->kp, regs); | 1208 | opt_pre_handler(&op->kp, regs); |
1208 | __this_cpu_write(current_kprobe, NULL); | 1209 | __this_cpu_write(current_kprobe, NULL); |
1209 | } | 1210 | } |
1210 | preempt_enable_no_resched(); | 1211 | local_irq_restore(flags); |
1211 | } | 1212 | } |
1212 | 1213 | ||
1213 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | 1214 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c11514e9128b..75ef4b18e9b7 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = { | |||
61 | .banner = default_banner, | 61 | .banner = default_banner, |
62 | }, | 62 | }, |
63 | 63 | ||
64 | .mapping = { | ||
65 | .pagetable_reserve = native_pagetable_reserve, | ||
66 | }, | ||
67 | |||
64 | .paging = { | 68 | .paging = { |
65 | .pagetable_setup_start = native_pagetable_setup_start, | 69 | .pagetable_setup_start = native_pagetable_setup_start, |
66 | .pagetable_setup_done = native_pagetable_setup_done, | 70 | .pagetable_setup_done = native_pagetable_setup_done, |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 286d289b039b..37b8b0fe8320 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
81 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); | 81 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); |
82 | } | 82 | } |
83 | 83 | ||
84 | void __init native_pagetable_reserve(u64 start, u64 end) | ||
85 | { | ||
86 | memblock_x86_reserve_range(start, end, "PGTABLE"); | ||
87 | } | ||
88 | |||
84 | struct map_range { | 89 | struct map_range { |
85 | unsigned long start; | 90 | unsigned long start; |
86 | unsigned long end; | 91 | unsigned long end; |
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
272 | 277 | ||
273 | __flush_tlb_all(); | 278 | __flush_tlb_all(); |
274 | 279 | ||
280 | /* | ||
281 | * Reserve the kernel pagetable pages we used (pgt_buf_start - | ||
282 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) | ||
283 | * so that they can be reused for other purposes. | ||
284 | * | ||
285 | * On native it just means calling memblock_x86_reserve_range, on Xen it | ||
286 | * also means marking RW the pagetable pages that we allocated before | ||
287 | * but that haven't been used. | ||
288 | * | ||
289 | * In fact on xen we mark RO the whole range pgt_buf_start - | ||
290 | * pgt_buf_top, because we have to make sure that when | ||
291 | * init_memory_mapping reaches the pagetable pages area, it maps | ||
292 | * RO all the pagetable pages, including the ones that are beyond | ||
293 | * pgt_buf_end at that time. | ||
294 | */ | ||
275 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) | 295 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) |
276 | memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, | 296 | x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), |
277 | pgt_buf_end << PAGE_SHIFT, "PGTABLE"); | 297 | PFN_PHYS(pgt_buf_end)); |
278 | 298 | ||
279 | if (!after_bootmem) | 299 | if (!after_bootmem) |
280 | early_memtest(start, end); | 300 | early_memtest(start, end); |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7cb6424317f6..c58e0ea39ef5 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
699 | struct mm_struct *mm, | 699 | struct mm_struct *mm, |
700 | unsigned long va, unsigned int cpu) | 700 | unsigned long va, unsigned int cpu) |
701 | { | 701 | { |
702 | int tcpu; | ||
703 | int uvhub; | ||
704 | int locals = 0; | 702 | int locals = 0; |
705 | int remotes = 0; | 703 | int remotes = 0; |
706 | int hubs = 0; | 704 | int hubs = 0; |
705 | int tcpu; | ||
706 | int tpnode; | ||
707 | struct bau_desc *bau_desc; | 707 | struct bau_desc *bau_desc; |
708 | struct cpumask *flush_mask; | 708 | struct cpumask *flush_mask; |
709 | struct ptc_stats *stat; | 709 | struct ptc_stats *stat; |
710 | struct bau_control *bcp; | 710 | struct bau_control *bcp; |
711 | struct bau_control *tbcp; | 711 | struct bau_control *tbcp; |
712 | struct hub_and_pnode *hpp; | ||
712 | 713 | ||
713 | /* kernel was booted 'nobau' */ | 714 | /* kernel was booted 'nobau' */ |
714 | if (nobau) | 715 | if (nobau) |
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
750 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 751 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
751 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 752 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
752 | 753 | ||
753 | /* cpu statistics */ | ||
754 | for_each_cpu(tcpu, flush_mask) { | 754 | for_each_cpu(tcpu, flush_mask) { |
755 | uvhub = uv_cpu_to_blade_id(tcpu); | 755 | /* |
756 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 756 | * The distribution vector is a bit map of pnodes, relative |
757 | if (uvhub == bcp->uvhub) | 757 | * to the partition base pnode (and the partition base nasid |
758 | * in the header). | ||
759 | * Translate cpu to pnode and hub using an array stored | ||
760 | * in local memory. | ||
761 | */ | ||
762 | hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; | ||
763 | tpnode = hpp->pnode - bcp->partition_base_pnode; | ||
764 | bau_uvhub_set(tpnode, &bau_desc->distribution); | ||
765 | if (hpp->uvhub == bcp->uvhub) | ||
758 | locals++; | 766 | locals++; |
759 | else | 767 | else |
760 | remotes++; | 768 | remotes++; |
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) | |||
855 | * an interrupt, but causes an error message to be returned to | 863 | * an interrupt, but causes an error message to be returned to |
856 | * the sender. | 864 | * the sender. |
857 | */ | 865 | */ |
858 | static void uv_enable_timeouts(void) | 866 | static void __init uv_enable_timeouts(void) |
859 | { | 867 | { |
860 | int uvhub; | 868 | int uvhub; |
861 | int nuvhubs; | 869 | int nuvhubs; |
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void) | |||
1326 | } | 1334 | } |
1327 | 1335 | ||
1328 | /* | 1336 | /* |
1329 | * initialize the sending side's sending buffers | 1337 | * Initialize the sending side's sending buffers. |
1330 | */ | 1338 | */ |
1331 | static void | 1339 | static void |
1332 | uv_activation_descriptor_init(int node, int pnode) | 1340 | uv_activation_descriptor_init(int node, int pnode, int base_pnode) |
1333 | { | 1341 | { |
1334 | int i; | 1342 | int i; |
1335 | int cpu; | 1343 | int cpu; |
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1352 | n = pa >> uv_nshift; | 1360 | n = pa >> uv_nshift; |
1353 | m = pa & uv_mmask; | 1361 | m = pa & uv_mmask; |
1354 | 1362 | ||
1363 | /* the 14-bit pnode */ | ||
1355 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 1364 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
1356 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | 1365 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
1357 | |||
1358 | /* | 1366 | /* |
1359 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 1367 | * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
1360 | * cpu even though we only use the first one; one descriptor can | 1368 | * cpu even though we only use the first one; one descriptor can |
1361 | * describe a broadcast to 256 uv hubs. | 1369 | * describe a broadcast to 256 uv hubs. |
1362 | */ | 1370 | */ |
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1365 | memset(bd2, 0, sizeof(struct bau_desc)); | 1373 | memset(bd2, 0, sizeof(struct bau_desc)); |
1366 | bd2->header.sw_ack_flag = 1; | 1374 | bd2->header.sw_ack_flag = 1; |
1367 | /* | 1375 | /* |
1368 | * base_dest_nodeid is the nasid of the first uvhub | 1376 | * The base_dest_nasid set in the message header is the nasid |
1369 | * in the partition. The bit map will indicate uvhub numbers, | 1377 | * of the first uvhub in the partition. The bit map will |
1370 | * which are 0-N in a partition. Pnodes are unique system-wide. | 1378 | * indicate destination pnode numbers relative to that base. |
1379 | * They may not be consecutive if nasid striding is being used. | ||
1371 | */ | 1380 | */ |
1372 | bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); | 1381 | bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); |
1373 | bd2->header.dest_subnodeid = 0x10; /* the LB */ | 1382 | bd2->header.dest_subnodeid = UV_LB_SUBNODEID; |
1374 | bd2->header.command = UV_NET_ENDPOINT_INTD; | 1383 | bd2->header.command = UV_NET_ENDPOINT_INTD; |
1375 | bd2->header.int_both = 1; | 1384 | bd2->header.int_both = 1; |
1376 | /* | 1385 | /* |
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode) | |||
1442 | /* | 1451 | /* |
1443 | * Initialization of each UV hub's structures | 1452 | * Initialization of each UV hub's structures |
1444 | */ | 1453 | */ |
1445 | static void __init uv_init_uvhub(int uvhub, int vector) | 1454 | static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) |
1446 | { | 1455 | { |
1447 | int node; | 1456 | int node; |
1448 | int pnode; | 1457 | int pnode; |
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector) | |||
1450 | 1459 | ||
1451 | node = uvhub_to_first_node(uvhub); | 1460 | node = uvhub_to_first_node(uvhub); |
1452 | pnode = uv_blade_to_pnode(uvhub); | 1461 | pnode = uv_blade_to_pnode(uvhub); |
1453 | uv_activation_descriptor_init(node, pnode); | 1462 | uv_activation_descriptor_init(node, pnode, base_pnode); |
1454 | uv_payload_queue_init(node, pnode); | 1463 | uv_payload_queue_init(node, pnode); |
1455 | /* | 1464 | /* |
1456 | * the below initialization can't be in firmware because the | 1465 | * The below initialization can't be in firmware because the |
1457 | * messaging IRQ will be determined by the OS | 1466 | * messaging IRQ will be determined by the OS. |
1458 | */ | 1467 | */ |
1459 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; | 1468 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; |
1460 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1469 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void) | |||
1491 | /* | 1500 | /* |
1492 | * initialize the bau_control structure for each cpu | 1501 | * initialize the bau_control structure for each cpu |
1493 | */ | 1502 | */ |
1494 | static int __init uv_init_per_cpu(int nuvhubs) | 1503 | static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) |
1495 | { | 1504 | { |
1496 | int i; | 1505 | int i; |
1497 | int cpu; | 1506 | int cpu; |
1507 | int tcpu; | ||
1498 | int pnode; | 1508 | int pnode; |
1499 | int uvhub; | 1509 | int uvhub; |
1500 | int have_hmaster; | 1510 | int have_hmaster; |
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1528 | bcp = &per_cpu(bau_control, cpu); | 1538 | bcp = &per_cpu(bau_control, cpu); |
1529 | memset(bcp, 0, sizeof(struct bau_control)); | 1539 | memset(bcp, 0, sizeof(struct bau_control)); |
1530 | pnode = uv_cpu_hub_info(cpu)->pnode; | 1540 | pnode = uv_cpu_hub_info(cpu)->pnode; |
1541 | if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { | ||
1542 | printk(KERN_EMERG | ||
1543 | "cpu %d pnode %d-%d beyond %d; BAU disabled\n", | ||
1544 | cpu, pnode, base_part_pnode, | ||
1545 | UV_DISTRIBUTION_SIZE); | ||
1546 | return 1; | ||
1547 | } | ||
1548 | bcp->osnode = cpu_to_node(cpu); | ||
1549 | bcp->partition_base_pnode = uv_partition_base_pnode; | ||
1531 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | 1550 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; |
1532 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | 1551 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); |
1533 | bdp = &uvhub_descs[uvhub]; | 1552 | bdp = &uvhub_descs[uvhub]; |
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1536 | bdp->pnode = pnode; | 1555 | bdp->pnode = pnode; |
1537 | /* kludge: 'assuming' one node per socket, and assuming that | 1556 | /* kludge: 'assuming' one node per socket, and assuming that |
1538 | disabling a socket just leaves a gap in node numbers */ | 1557 | disabling a socket just leaves a gap in node numbers */ |
1539 | socket = (cpu_to_node(cpu) & 1); | 1558 | socket = bcp->osnode & 1; |
1540 | bdp->socket_mask |= (1 << socket); | 1559 | bdp->socket_mask |= (1 << socket); |
1541 | sdp = &bdp->socket[socket]; | 1560 | sdp = &bdp->socket[socket]; |
1542 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1561 | sdp->cpu_number[sdp->num_cpus] = cpu; |
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1585 | nextsocket: | 1604 | nextsocket: |
1586 | socket++; | 1605 | socket++; |
1587 | socket_mask = (socket_mask >> 1); | 1606 | socket_mask = (socket_mask >> 1); |
1607 | /* each socket gets a local array of pnodes/hubs */ | ||
1608 | bcp = smaster; | ||
1609 | bcp->target_hub_and_pnode = kmalloc_node( | ||
1610 | sizeof(struct hub_and_pnode) * | ||
1611 | num_possible_cpus(), GFP_KERNEL, bcp->osnode); | ||
1612 | memset(bcp->target_hub_and_pnode, 0, | ||
1613 | sizeof(struct hub_and_pnode) * | ||
1614 | num_possible_cpus()); | ||
1615 | for_each_present_cpu(tcpu) { | ||
1616 | bcp->target_hub_and_pnode[tcpu].pnode = | ||
1617 | uv_cpu_hub_info(tcpu)->pnode; | ||
1618 | bcp->target_hub_and_pnode[tcpu].uvhub = | ||
1619 | uv_cpu_hub_info(tcpu)->numa_blade_id; | ||
1620 | } | ||
1588 | } | 1621 | } |
1589 | } | 1622 | } |
1590 | kfree(uvhub_descs); | 1623 | kfree(uvhub_descs); |
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void) | |||
1637 | spin_lock_init(&disable_lock); | 1670 | spin_lock_init(&disable_lock); |
1638 | congested_cycles = microsec_2_cycles(congested_response_us); | 1671 | congested_cycles = microsec_2_cycles(congested_response_us); |
1639 | 1672 | ||
1640 | if (uv_init_per_cpu(nuvhubs)) { | ||
1641 | nobau = 1; | ||
1642 | return 0; | ||
1643 | } | ||
1644 | |||
1645 | uv_partition_base_pnode = 0x7fffffff; | 1673 | uv_partition_base_pnode = 0x7fffffff; |
1646 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | 1674 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
1647 | if (uv_blade_nr_possible_cpus(uvhub) && | 1675 | if (uv_blade_nr_possible_cpus(uvhub) && |
1648 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) | 1676 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) |
1649 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); | 1677 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); |
1678 | } | ||
1679 | |||
1680 | if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { | ||
1681 | nobau = 1; | ||
1682 | return 0; | ||
1683 | } | ||
1650 | 1684 | ||
1651 | vector = UV_BAU_MESSAGE; | 1685 | vector = UV_BAU_MESSAGE; |
1652 | for_each_possible_blade(uvhub) | 1686 | for_each_possible_blade(uvhub) |
1653 | if (uv_blade_nr_possible_cpus(uvhub)) | 1687 | if (uv_blade_nr_possible_cpus(uvhub)) |
1654 | uv_init_uvhub(uvhub, vector); | 1688 | uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); |
1655 | 1689 | ||
1656 | uv_enable_timeouts(); | 1690 | uv_enable_timeouts(); |
1657 | alloc_intr_gate(vector, uv_bau_message_intr1); | 1691 | alloc_intr_gate(vector, uv_bau_message_intr1); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 55c965b38c27..0684f3c74d53 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
1275 | { | 1275 | { |
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | ||
1279 | { | ||
1280 | /* reserve the range used */ | ||
1281 | native_pagetable_reserve(start, end); | ||
1282 | |||
1283 | /* set as RW the rest */ | ||
1284 | printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, | ||
1285 | PFN_PHYS(pgt_buf_top)); | ||
1286 | while (end < PFN_PHYS(pgt_buf_top)) { | ||
1287 | make_lowmem_page_readwrite(__va(end)); | ||
1288 | end += PAGE_SIZE; | ||
1289 | } | ||
1290 | } | ||
1291 | |||
1278 | static void xen_post_allocator_init(void); | 1292 | static void xen_post_allocator_init(void); |
1279 | 1293 | ||
1280 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1294 | static __init void xen_pagetable_setup_done(pgd_t *base) |
@@ -1463,119 +1477,6 @@ static int xen_pgd_alloc(struct mm_struct *mm) | |||
1463 | return ret; | 1477 | return ret; |
1464 | } | 1478 | } |
1465 | 1479 | ||
1466 | #ifdef CONFIG_X86_64 | ||
1467 | static __initdata u64 __last_pgt_set_rw = 0; | ||
1468 | static __initdata u64 __pgt_buf_start = 0; | ||
1469 | static __initdata u64 __pgt_buf_end = 0; | ||
1470 | static __initdata u64 __pgt_buf_top = 0; | ||
1471 | /* | ||
1472 | * As a consequence of the commit: | ||
1473 | * | ||
1474 | * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e | ||
1475 | * Author: Yinghai Lu <yinghai@kernel.org> | ||
1476 | * Date: Fri Dec 17 16:58:28 2010 -0800 | ||
1477 | * | ||
1478 | * x86-64, mm: Put early page table high | ||
1479 | * | ||
1480 | * at some point init_memory_mapping is going to reach the pagetable pages | ||
1481 | * area and map those pages too (mapping them as normal memory that falls | ||
1482 | * in the range of addresses passed to init_memory_mapping as argument). | ||
1483 | * Some of those pages are already pagetable pages (they are in the range | ||
1484 | * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and | ||
1485 | * everything is fine. | ||
1486 | * Some of these pages are not pagetable pages yet (they fall in the range | ||
1487 | * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they | ||
1488 | * are going to be mapped RW. When these pages become pagetable pages and | ||
1489 | * are hooked into the pagetable, xen will find that the guest has already | ||
1490 | * a RW mapping of them somewhere and fail the operation. | ||
1491 | * The reason Xen requires pagetables to be RO is that the hypervisor needs | ||
1492 | * to verify that the pagetables are valid before using them. The validation | ||
1493 | * operations are called "pinning". | ||
1494 | * | ||
1495 | * In order to fix the issue we mark all the pages in the entire range | ||
1496 | * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation | ||
1497 | * is completed only the range pgt_buf_start-pgt_buf_end is reserved by | ||
1498 | * init_memory_mapping. Hence the kernel is going to crash as soon as one | ||
1499 | * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those | ||
1500 | * ranges are RO). | ||
1501 | * | ||
1502 | * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_ | ||
1503 | * the init_memory_mapping has completed (in a perfect world we would | ||
1504 | * call this function from init_memory_mapping, but lets ignore that). | ||
1505 | * | ||
1506 | * Because we are called _after_ init_memory_mapping the pgt_buf_[start, | ||
1507 | * end,top] have all changed to new values (b/c init_memory_mapping | ||
1508 | * is called and setting up another new page-table). Hence, the first time | ||
1509 | * we enter this function, we save away the pgt_buf_start value and update | ||
1510 | * the pgt_buf_[end,top]. | ||
1511 | * | ||
1512 | * When we detect that the "old" pgt_buf_start through pgt_buf_end | ||
1513 | * PFNs have been reserved (so memblock_x86_reserve_range has been called), | ||
1514 | * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top. | ||
1515 | * | ||
1516 | * And then we update those "old" pgt_buf_[end|top] with the new ones | ||
1517 | * so that we can redo this on the next pagetable. | ||
1518 | */ | ||
1519 | static __init void mark_rw_past_pgt(void) { | ||
1520 | |||
1521 | if (pgt_buf_end > pgt_buf_start) { | ||
1522 | u64 addr, size; | ||
1523 | |||
1524 | /* Save it away. */ | ||
1525 | if (!__pgt_buf_start) { | ||
1526 | __pgt_buf_start = pgt_buf_start; | ||
1527 | __pgt_buf_end = pgt_buf_end; | ||
1528 | __pgt_buf_top = pgt_buf_top; | ||
1529 | return; | ||
1530 | } | ||
1531 | /* If we get the range that starts at __pgt_buf_end that means | ||
1532 | * the range is reserved, and that in 'init_memory_mapping' | ||
1533 | * the 'memblock_x86_reserve_range' has been called with the | ||
1534 | * outdated __pgt_buf_start, __pgt_buf_end (the "new" | ||
1535 | * pgt_buf_[start|end|top] refer now to a new pagetable. | ||
1536 | * Note: we are called _after_ the pgt_buf_[..] have been | ||
1537 | * updated.*/ | ||
1538 | |||
1539 | addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start), | ||
1540 | &size, PAGE_SIZE); | ||
1541 | |||
1542 | /* Still not reserved, meaning 'memblock_x86_reserve_range' | ||
1543 | * hasn't been called yet. Update the _end and _top.*/ | ||
1544 | if (addr == PFN_PHYS(__pgt_buf_start)) { | ||
1545 | __pgt_buf_end = pgt_buf_end; | ||
1546 | __pgt_buf_top = pgt_buf_top; | ||
1547 | return; | ||
1548 | } | ||
1549 | |||
1550 | /* OK, the area is reserved, meaning it is time for us to | ||
1551 | * set RW for the old end->top PFNs. */ | ||
1552 | |||
1553 | /* ..unless we had already done this. */ | ||
1554 | if (__pgt_buf_end == __last_pgt_set_rw) | ||
1555 | return; | ||
1556 | |||
1557 | addr = PFN_PHYS(__pgt_buf_end); | ||
1558 | |||
1559 | /* set as RW the rest */ | ||
1560 | printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", | ||
1561 | PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top)); | ||
1562 | |||
1563 | while (addr < PFN_PHYS(__pgt_buf_top)) { | ||
1564 | make_lowmem_page_readwrite(__va(addr)); | ||
1565 | addr += PAGE_SIZE; | ||
1566 | } | ||
1567 | /* And update everything so that we are ready for the next | ||
1568 | * pagetable (the one created for regions past 4GB) */ | ||
1569 | __last_pgt_set_rw = __pgt_buf_end; | ||
1570 | __pgt_buf_start = pgt_buf_start; | ||
1571 | __pgt_buf_end = pgt_buf_end; | ||
1572 | __pgt_buf_top = pgt_buf_top; | ||
1573 | } | ||
1574 | return; | ||
1575 | } | ||
1576 | #else | ||
1577 | static __init void mark_rw_past_pgt(void) { } | ||
1578 | #endif | ||
1579 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | 1480 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) |
1580 | { | 1481 | { |
1581 | #ifdef CONFIG_X86_64 | 1482 | #ifdef CONFIG_X86_64 |
@@ -1602,14 +1503,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1602 | unsigned long pfn = pte_pfn(pte); | 1503 | unsigned long pfn = pte_pfn(pte); |
1603 | 1504 | ||
1604 | /* | 1505 | /* |
1605 | * A bit of optimization. We do not need to call the workaround | ||
1606 | * when xen_set_pte_init is called with a PTE with 0 as PFN. | ||
1607 | * That is b/c the pagetable at that point are just being populated | ||
1608 | * with empty values and we can save some cycles by not calling | ||
1609 | * the 'memblock' code.*/ | ||
1610 | if (pfn) | ||
1611 | mark_rw_past_pgt(); | ||
1612 | /* | ||
1613 | * If the new pfn is within the range of the newly allocated | 1506 | * If the new pfn is within the range of the newly allocated |
1614 | * kernel pagetable, and it isn't being mapped into an | 1507 | * kernel pagetable, and it isn't being mapped into an |
1615 | * early_ioremap fixmap slot as a freshly allocated page, make sure | 1508 | * early_ioremap fixmap slot as a freshly allocated page, make sure |
@@ -2118,8 +2011,6 @@ __init void xen_ident_map_ISA(void) | |||
2118 | 2011 | ||
2119 | static __init void xen_post_allocator_init(void) | 2012 | static __init void xen_post_allocator_init(void) |
2120 | { | 2013 | { |
2121 | mark_rw_past_pgt(); | ||
2122 | |||
2123 | #ifdef CONFIG_XEN_DEBUG | 2014 | #ifdef CONFIG_XEN_DEBUG |
2124 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); | 2015 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); |
2125 | #endif | 2016 | #endif |
@@ -2228,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
2228 | 2119 | ||
2229 | void __init xen_init_mmu_ops(void) | 2120 | void __init xen_init_mmu_ops(void) |
2230 | { | 2121 | { |
2122 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | ||
2231 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; | 2123 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; |
2232 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | 2124 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; |
2233 | pv_mmu_ops = xen_mmu_ops; | 2125 | pv_mmu_ops = xen_mmu_ops; |