aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig19
-rw-r--r--arch/alpha/boot/misc.c39
-rw-r--r--arch/arm/Kconfig8
-rw-r--r--arch/arm/boot/compressed/misc.c59
-rw-r--r--arch/arm/kernel/kprobes.c6
-rw-r--r--arch/arm/plat-omap/gpio.c3
-rw-r--r--arch/avr32/Kconfig2
-rw-r--r--arch/avr32/mach-at32ap/pio.c2
-rw-r--r--arch/cris/arch-v10/boot/compressed/misc.c36
-rw-r--r--arch/cris/arch-v32/boot/compressed/misc.c39
-rw-r--r--arch/h8300/Kconfig14
-rw-r--r--arch/h8300/boot/compressed/misc.c38
-rw-r--r--arch/ia64/kernel/entry.S6
-rw-r--r--arch/ia64/kernel/kprobes.c6
-rw-r--r--arch/m32r/boot/compressed/misc.c37
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/kernel/linux32.c1
-rw-r--r--arch/mn10300/boot/compressed/misc.c37
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/kernel/cputable.c11
-rw-r--r--arch/powerpc/kernel/entry_32.S6
-rw-r--r--arch/powerpc/kernel/iommu.c28
-rw-r--r--arch/powerpc/kernel/kprobes.c6
-rw-r--r--arch/powerpc/kernel/lparcfg.c386
-rw-r--r--arch/powerpc/kernel/process.c46
-rw-r--r--arch/powerpc/kernel/prom_init.c9
-rw-r--r--arch/powerpc/kernel/ptrace.c72
-rw-r--r--arch/powerpc/kernel/signal.c6
-rw-r--r--arch/powerpc/kernel/sysfs.c3
-rw-r--r--arch/powerpc/kernel/traps.c16
-rw-r--r--arch/powerpc/kernel/vio.c1033
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/mm/fault.c25
-rw-r--r--arch/powerpc/platforms/52xx/Kconfig4
-rw-r--r--arch/powerpc/platforms/cell/iommu.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c35
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.c3
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c3
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c3
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig23
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c468
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c42
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c71
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c3
-rw-r--r--arch/powerpc/sysdev/qe_lib/Kconfig2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/kernel/kprobes.c6
-rw-r--r--arch/s390/kernel/setup.c4
-rw-r--r--arch/s390/kernel/topology.c14
-rw-r--r--arch/sh/boot/compressed/misc_32.c38
-rw-r--r--arch/sh/boot/compressed/misc_64.c40
-rw-r--r--arch/sparc/Kconfig14
-rw-r--r--arch/sparc64/kernel/kprobes.c11
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/misc.c39
-rw-r--r--arch/x86/kernel/kprobes.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c85
59 files changed, 2272 insertions, 677 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 6093c0be58b0..b0fabfa864ff 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -27,6 +27,25 @@ config KPROBES
27 for kernel debugging, non-intrusive instrumentation and testing. 27 for kernel debugging, non-intrusive instrumentation and testing.
28 If in doubt, say "N". 28 If in doubt, say "N".
29 29
30config HAVE_EFFICIENT_UNALIGNED_ACCESS
31 def_bool n
32 help
33 Some architectures are unable to perform unaligned accesses
34 without the use of get_unaligned/put_unaligned. Others are
35 unable to perform such accesses efficiently (e.g. trap on
36 unaligned access and require fixing it up in the exception
37 handler.)
38
39 This symbol should be selected by an architecture if it can
40 perform unaligned accesses efficiently to allow different
41 code paths to be selected for these cases. Some network
42 drivers, for example, could opt to not fix up alignment
43 problems with received packets if doing so would not help
44 much.
45
46 See Documentation/unaligned-memory-access.txt for more
47 information on the topic of unaligned memory accesses.
48
30config KRETPROBES 49config KRETPROBES
31 def_bool y 50 def_bool y
32 depends on KPROBES && HAVE_KRETPROBES 51 depends on KPROBES && HAVE_KRETPROBES
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index c00646b25f6e..3047a1b3a517 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -78,8 +78,6 @@ static unsigned outcnt; /* bytes in output buffer */
78static int fill_inbuf(void); 78static int fill_inbuf(void);
79static void flush_window(void); 79static void flush_window(void);
80static void error(char *m); 80static void error(char *m);
81static void gzip_mark(void **);
82static void gzip_release(void **);
83 81
84static char *input_data; 82static char *input_data;
85static int input_data_size; 83static int input_data_size;
@@ -88,51 +86,18 @@ static uch *output_data;
88static ulg output_ptr; 86static ulg output_ptr;
89static ulg bytes_out; 87static ulg bytes_out;
90 88
91static void *malloc(int size);
92static void free(void *where);
93static void error(char *m); 89static void error(char *m);
94static void gzip_mark(void **); 90static void gzip_mark(void **);
95static void gzip_release(void **); 91static void gzip_release(void **);
96 92
97extern int end; 93extern int end;
98static ulg free_mem_ptr; 94static ulg free_mem_ptr;
99static ulg free_mem_ptr_end; 95static ulg free_mem_end_ptr;
100 96
101#define HEAP_SIZE 0x3000 97#define HEAP_SIZE 0x3000
102 98
103#include "../../../lib/inflate.c" 99#include "../../../lib/inflate.c"
104 100
105static void *malloc(int size)
106{
107 void *p;
108
109 if (size <0) error("Malloc error");
110 if (free_mem_ptr <= 0) error("Memory error");
111
112 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
113
114 p = (void *)free_mem_ptr;
115 free_mem_ptr += size;
116
117 if (free_mem_ptr >= free_mem_ptr_end)
118 error("Out of memory");
119 return p;
120}
121
122static void free(void *where)
123{ /* gzip_mark & gzip_release do the free */
124}
125
126static void gzip_mark(void **ptr)
127{
128 *ptr = (void *) free_mem_ptr;
129}
130
131static void gzip_release(void **ptr)
132{
133 free_mem_ptr = (long) *ptr;
134}
135
136/* =========================================================================== 101/* ===========================================================================
137 * Fill the input buffer. This is called only when the buffer is empty 102 * Fill the input buffer. This is called only when the buffer is empty
138 * and at least one byte is really needed. 103 * and at least one byte is really needed.
@@ -193,7 +158,7 @@ decompress_kernel(void *output_start,
193 158
194 /* FIXME FIXME FIXME */ 159 /* FIXME FIXME FIXME */
195 free_mem_ptr = (ulg)output_start + ksize; 160 free_mem_ptr = (ulg)output_start + ksize;
196 free_mem_ptr_end = (ulg)output_start + ksize + 0x200000; 161 free_mem_end_ptr = (ulg)output_start + ksize + 0x200000;
197 /* FIXME FIXME FIXME */ 162 /* FIXME FIXME FIXME */
198 163
199 /* put in temp area to reduce initial footprint */ 164 /* put in temp area to reduce initial footprint */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6fb4f03369f2..dabb015aa40b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -268,7 +268,7 @@ config ARCH_EP93XX
268 select GENERIC_GPIO 268 select GENERIC_GPIO
269 select HAVE_CLK 269 select HAVE_CLK
270 select HAVE_CLK 270 select HAVE_CLK
271 select HAVE_GPIO_LIB 271 select ARCH_REQUIRE_GPIOLIB
272 help 272 help
273 This enables support for the Cirrus EP93xx series of CPUs. 273 This enables support for the Cirrus EP93xx series of CPUs.
274 274
@@ -447,7 +447,7 @@ config ARCH_PXA
447 select ARCH_MTD_XIP 447 select ARCH_MTD_XIP
448 select GENERIC_GPIO 448 select GENERIC_GPIO
449 select HAVE_CLK 449 select HAVE_CLK
450 select HAVE_GPIO_LIB 450 select ARCH_REQUIRE_GPIOLIB
451 select GENERIC_TIME 451 select GENERIC_TIME
452 select GENERIC_CLOCKEVENTS 452 select GENERIC_CLOCKEVENTS
453 select TICK_ONESHOT 453 select TICK_ONESHOT
@@ -479,7 +479,7 @@ config ARCH_SA1100
479 select GENERIC_CLOCKEVENTS 479 select GENERIC_CLOCKEVENTS
480 select HAVE_CLK 480 select HAVE_CLK
481 select TICK_ONESHOT 481 select TICK_ONESHOT
482 select HAVE_GPIO_LIB 482 select ARCH_REQUIRE_GPIOLIB
483 help 483 help
484 Support for StrongARM 11x0 based boards. 484 Support for StrongARM 11x0 based boards.
485 485
@@ -522,7 +522,7 @@ config ARCH_OMAP
522 bool "TI OMAP" 522 bool "TI OMAP"
523 select GENERIC_GPIO 523 select GENERIC_GPIO
524 select HAVE_CLK 524 select HAVE_CLK
525 select HAVE_GPIO_LIB 525 select ARCH_REQUIRE_GPIOLIB
526 select GENERIC_TIME 526 select GENERIC_TIME
527 select GENERIC_CLOCKEVENTS 527 select GENERIC_CLOCKEVENTS
528 help 528 help
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 9b444022cb9b..7145cc7c04f0 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -217,8 +217,6 @@ static unsigned outcnt; /* bytes in output buffer */
217static int fill_inbuf(void); 217static int fill_inbuf(void);
218static void flush_window(void); 218static void flush_window(void);
219static void error(char *m); 219static void error(char *m);
220static void gzip_mark(void **);
221static void gzip_release(void **);
222 220
223extern char input_data[]; 221extern char input_data[];
224extern char input_data_end[]; 222extern char input_data_end[];
@@ -227,64 +225,21 @@ static uch *output_data;
227static ulg output_ptr; 225static ulg output_ptr;
228static ulg bytes_out; 226static ulg bytes_out;
229 227
230static void *malloc(int size);
231static void free(void *where);
232static void error(char *m); 228static void error(char *m);
233static void gzip_mark(void **);
234static void gzip_release(void **);
235 229
236static void putstr(const char *); 230static void putstr(const char *);
237 231
238extern int end; 232extern int end;
239static ulg free_mem_ptr; 233static ulg free_mem_ptr;
240static ulg free_mem_ptr_end; 234static ulg free_mem_end_ptr;
241 235
242#define HEAP_SIZE 0x3000 236#ifdef STANDALONE_DEBUG
243 237#define NO_INFLATE_MALLOC
244#include "../../../../lib/inflate.c" 238#endif
245
246#ifndef STANDALONE_DEBUG
247static void *malloc(int size)
248{
249 void *p;
250
251 if (size <0) error("Malloc error");
252 if (free_mem_ptr <= 0) error("Memory error");
253
254 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
255
256 p = (void *)free_mem_ptr;
257 free_mem_ptr += size;
258
259 if (free_mem_ptr >= free_mem_ptr_end)
260 error("Out of memory");
261 return p;
262}
263
264static void free(void *where)
265{ /* gzip_mark & gzip_release do the free */
266}
267
268static void gzip_mark(void **ptr)
269{
270 arch_decomp_wdog();
271 *ptr = (void *) free_mem_ptr;
272}
273 239
274static void gzip_release(void **ptr) 240#define ARCH_HAS_DECOMP_WDOG
275{
276 arch_decomp_wdog();
277 free_mem_ptr = (long) *ptr;
278}
279#else
280static void gzip_mark(void **ptr)
281{
282}
283 241
284static void gzip_release(void **ptr) 242#include "../../../../lib/inflate.c"
285{
286}
287#endif
288 243
289/* =========================================================================== 244/* ===========================================================================
290 * Fill the input buffer. This is called only when the buffer is empty 245 * Fill the input buffer. This is called only when the buffer is empty
@@ -348,7 +303,7 @@ decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p,
348{ 303{
349 output_data = (uch *)output_start; /* Points to kernel start */ 304 output_data = (uch *)output_start; /* Points to kernel start */
350 free_mem_ptr = free_mem_ptr_p; 305 free_mem_ptr = free_mem_ptr_p;
351 free_mem_ptr_end = free_mem_ptr_end_p; 306 free_mem_end_ptr = free_mem_ptr_end_p;
352 __machine_arch_type = arch_id; 307 __machine_arch_type = arch_id;
353 308
354 arch_decomp_setup(); 309 arch_decomp_setup();
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5ee39e10c8d1..d28513f14d05 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
296 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 296 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
297 297
298 INIT_HLIST_HEAD(&empty_rp); 298 INIT_HLIST_HEAD(&empty_rp);
299 spin_lock_irqsave(&kretprobe_lock, flags); 299 kretprobe_hash_lock(current, &head, &flags);
300 head = kretprobe_inst_table_head(current);
301 300
302 /* 301 /*
303 * It is possible to have multiple instances associated with a given 302 * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
337 } 336 }
338 337
339 kretprobe_assert(ri, orig_ret_address, trampoline_address); 338 kretprobe_assert(ri, orig_ret_address, trampoline_address);
340 spin_unlock_irqrestore(&kretprobe_lock, flags); 339 kretprobe_hash_unlock(current, &flags);
341 340
342 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 341 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
343 hlist_del(&ri->hlist); 342 hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
347 return (void *)orig_ret_address; 346 return (void *)orig_ret_address;
348} 347}
349 348
350/* Called with kretprobe_lock held. */
351void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 349void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
352 struct pt_regs *regs) 350 struct pt_regs *regs)
353{ 351{
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 1903a3491ee9..d8e9c2c3f0f6 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1488,6 +1488,9 @@ static int __init _omap_gpio_init(void)
1488 bank->chip.set = gpio_set; 1488 bank->chip.set = gpio_set;
1489 if (bank_is_mpuio(bank)) { 1489 if (bank_is_mpuio(bank)) {
1490 bank->chip.label = "mpuio"; 1490 bank->chip.label = "mpuio";
1491#ifdef CONFIG_ARCH_OMAP1
1492 bank->chip.dev = &omap_mpuio_device.dev;
1493#endif
1491 bank->chip.base = OMAP_MPUIO(0); 1494 bank->chip.base = OMAP_MPUIO(0);
1492 } else { 1495 } else {
1493 bank->chip.label = "gpio"; 1496 bank->chip.label = "gpio";
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index df4adefedb42..7c239a916275 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -88,7 +88,7 @@ config PLATFORM_AT32AP
88 select SUBARCH_AVR32B 88 select SUBARCH_AVR32B
89 select MMU 89 select MMU
90 select PERFORMANCE_COUNTERS 90 select PERFORMANCE_COUNTERS
91 select HAVE_GPIO_LIB 91 select ARCH_REQUIRE_GPIOLIB
92 select GENERIC_ALLOCATOR 92 select GENERIC_ALLOCATOR
93 93
94# 94#
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 60da03ba7117..296294f8ed81 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -360,6 +360,8 @@ static int __init pio_probe(struct platform_device *pdev)
360 pio->chip.label = pio->name; 360 pio->chip.label = pio->name;
361 pio->chip.base = pdev->id * 32; 361 pio->chip.base = pdev->id * 32;
362 pio->chip.ngpio = 32; 362 pio->chip.ngpio = 32;
363 pio->chip.dev = &pdev->dev;
364 pio->chip.owner = THIS_MODULE;
363 365
364 pio->chip.direction_input = direction_input; 366 pio->chip.direction_input = direction_input;
365 pio->chip.get = gpio_get; 367 pio->chip.get = gpio_get;
diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c
index 18e13bce1400..d933c89889db 100644
--- a/arch/cris/arch-v10/boot/compressed/misc.c
+++ b/arch/cris/arch-v10/boot/compressed/misc.c
@@ -102,50 +102,16 @@ extern char *input_data; /* lives in head.S */
102static long bytes_out = 0; 102static long bytes_out = 0;
103static uch *output_data; 103static uch *output_data;
104static unsigned long output_ptr = 0; 104static unsigned long output_ptr = 0;
105
106static void *malloc(int size);
107static void free(void *where);
108static void gzip_mark(void **);
109static void gzip_release(void **);
110
111static void puts(const char *); 105static void puts(const char *);
112 106
113/* the "heap" is put directly after the BSS ends, at end */ 107/* the "heap" is put directly after the BSS ends, at end */
114 108
115extern int _end; 109extern int _end;
116static long free_mem_ptr = (long)&_end; 110static long free_mem_ptr = (long)&_end;
111static long free_mem_end_ptr;
117 112
118#include "../../../../../lib/inflate.c" 113#include "../../../../../lib/inflate.c"
119 114
120static void *malloc(int size)
121{
122 void *p;
123
124 if (size < 0)
125 error("Malloc error");
126
127 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
128
129 p = (void *)free_mem_ptr;
130 free_mem_ptr += size;
131
132 return p;
133}
134
135static void free(void *where)
136{ /* Don't care */
137}
138
139static void gzip_mark(void **ptr)
140{
141 *ptr = (void *) free_mem_ptr;
142}
143
144static void gzip_release(void **ptr)
145{
146 free_mem_ptr = (long) *ptr;
147}
148
149/* decompressor info and error messages to serial console */ 115/* decompressor info and error messages to serial console */
150 116
151static void 117static void
diff --git a/arch/cris/arch-v32/boot/compressed/misc.c b/arch/cris/arch-v32/boot/compressed/misc.c
index 55b2695c5d70..3595e16e82bc 100644
--- a/arch/cris/arch-v32/boot/compressed/misc.c
+++ b/arch/cris/arch-v32/boot/compressed/misc.c
@@ -89,20 +89,14 @@ static unsigned outcnt = 0; /* bytes in output buffer */
89 89
90static void flush_window(void); 90static void flush_window(void);
91static void error(char *m); 91static void error(char *m);
92static void gzip_mark(void **);
93static void gzip_release(void **);
94 92
95extern char *input_data; /* lives in head.S */ 93extern char *input_data; /* lives in head.S */
96 94
97static long bytes_out = 0; 95static long bytes_out;
98static uch *output_data; 96static uch *output_data;
99static unsigned long output_ptr = 0; 97static unsigned long output_ptr;
100 98
101static void *malloc(int size);
102static void free(void *where);
103static void error(char *m); 99static void error(char *m);
104static void gzip_mark(void **);
105static void gzip_release(void **);
106 100
107static void puts(const char *); 101static void puts(const char *);
108 102
@@ -110,37 +104,10 @@ static void puts(const char *);
110 104
111extern int _end; 105extern int _end;
112static long free_mem_ptr = (long)&_end; 106static long free_mem_ptr = (long)&_end;
107static long free_mem_end_ptr;
113 108
114#include "../../../../../lib/inflate.c" 109#include "../../../../../lib/inflate.c"
115 110
116static void *malloc(int size)
117{
118 void *p;
119
120 if (size <0) error("Malloc error");
121
122 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
123
124 p = (void *)free_mem_ptr;
125 free_mem_ptr += size;
126
127 return p;
128}
129
130static void free(void *where)
131{ /* Don't care */
132}
133
134static void gzip_mark(void **ptr)
135{
136 *ptr = (void *) free_mem_ptr;
137}
138
139static void gzip_release(void **ptr)
140{
141 free_mem_ptr = (long) *ptr;
142}
143
144/* decompressor info and error messages to serial console */ 111/* decompressor info and error messages to serial console */
145 112
146static inline void 113static inline void
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 085dc6ec152b..396ab059efa3 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -203,20 +203,6 @@ config UNIX98_PTYS
203 Read the instructions in <file:Documentation/Changes> pertaining to 203 Read the instructions in <file:Documentation/Changes> pertaining to
204 pseudo terminals. It's safe to say N. 204 pseudo terminals. It's safe to say N.
205 205
206config UNIX98_PTY_COUNT
207 int "Maximum number of Unix98 PTYs in use (0-2048)"
208 depends on UNIX98_PTYS
209 default "256"
210 help
211 The maximum number of Unix98 PTYs that can be used at any one time.
212 The default is 256, and should be enough for desktop systems. Server
213 machines which support incoming telnet/rlogin/ssh connections and/or
214 serve several X terminals may want to increase this: every incoming
215 connection and every xterm uses up one PTY.
216
217 When not in use, each additional set of 256 PTYs occupy
218 approximately 8 KB of kernel memory on 32-bit architectures.
219
220source "drivers/char/pcmcia/Kconfig" 206source "drivers/char/pcmcia/Kconfig"
221 207
222source "drivers/serial/Kconfig" 208source "drivers/serial/Kconfig"
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
index 845074588af0..51ab6cbd030f 100644
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -67,8 +67,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
67static int fill_inbuf(void); 67static int fill_inbuf(void);
68static void flush_window(void); 68static void flush_window(void);
69static void error(char *m); 69static void error(char *m);
70static void gzip_mark(void **);
71static void gzip_release(void **);
72 70
73extern char input_data[]; 71extern char input_data[];
74extern int input_len; 72extern int input_len;
@@ -77,11 +75,7 @@ static long bytes_out = 0;
77static uch *output_data; 75static uch *output_data;
78static unsigned long output_ptr = 0; 76static unsigned long output_ptr = 0;
79 77
80static void *malloc(int size);
81static void free(void *where);
82static void error(char *m); 78static void error(char *m);
83static void gzip_mark(void **);
84static void gzip_release(void **);
85 79
86int puts(const char *); 80int puts(const char *);
87 81
@@ -98,38 +92,6 @@ static unsigned long free_mem_end_ptr;
98#define TDR *((volatile unsigned char *)0xffff8b) 92#define TDR *((volatile unsigned char *)0xffff8b)
99#define SSR *((volatile unsigned char *)0xffff8c) 93#define SSR *((volatile unsigned char *)0xffff8c)
100 94
101static void *malloc(int size)
102{
103 void *p;
104
105 if (size <0) error("Malloc error");
106 if (free_mem_ptr == 0) error("Memory error");
107
108 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
109
110 p = (void *)free_mem_ptr;
111 free_mem_ptr += size;
112
113 if (free_mem_ptr >= free_mem_end_ptr)
114 error("Out of memory");
115
116 return p;
117}
118
119static void free(void *where)
120{ /* Don't care */
121}
122
123static void gzip_mark(void **ptr)
124{
125 *ptr = (void *) free_mem_ptr;
126}
127
128static void gzip_release(void **ptr)
129{
130 free_mem_ptr = (long) *ptr;
131}
132
133int puts(const char *s) 95int puts(const char *s)
134{ 96{
135 return 0; 97 return 0;
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 56ab156c48ae..0dd6c1419d8d 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1691,6 +1691,12 @@ sys_call_table:
1691 data8 sys_timerfd_create // 1310 1691 data8 sys_timerfd_create // 1310
1692 data8 sys_timerfd_settime 1692 data8 sys_timerfd_settime
1693 data8 sys_timerfd_gettime 1693 data8 sys_timerfd_gettime
1694 data8 sys_signalfd4
1695 data8 sys_eventfd2
1696 data8 sys_epoll_create1 // 1315
1697 data8 sys_dup3
1698 data8 sys_pipe2
1699 data8 sys_inotify_init1
1694 1700
1695 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls 1701 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
1696#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ 1702#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 233434f4f88f..f07688da947c 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
429 ((struct fnptr *)kretprobe_trampoline)->ip; 429 ((struct fnptr *)kretprobe_trampoline)->ip;
430 430
431 INIT_HLIST_HEAD(&empty_rp); 431 INIT_HLIST_HEAD(&empty_rp);
432 spin_lock_irqsave(&kretprobe_lock, flags); 432 kretprobe_hash_lock(current, &head, &flags);
433 head = kretprobe_inst_table_head(current);
434 433
435 /* 434 /*
436 * It is possible to have multiple instances associated with a given 435 * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
485 kretprobe_assert(ri, orig_ret_address, trampoline_address); 484 kretprobe_assert(ri, orig_ret_address, trampoline_address);
486 485
487 reset_current_kprobe(); 486 reset_current_kprobe();
488 spin_unlock_irqrestore(&kretprobe_lock, flags); 487 kretprobe_hash_unlock(current, &flags);
489 preempt_enable_no_resched(); 488 preempt_enable_no_resched();
490 489
491 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 490 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
500 return 1; 499 return 1;
501} 500}
502 501
503/* Called with kretprobe_lock held */
504void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 502void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
505 struct pt_regs *regs) 503 struct pt_regs *regs)
506{ 504{
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 600d40e33495..d394292498c0 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -70,8 +70,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
70static int fill_inbuf(void); 70static int fill_inbuf(void);
71static void flush_window(void); 71static void flush_window(void);
72static void error(char *m); 72static void error(char *m);
73static void gzip_mark(void **);
74static void gzip_release(void **);
75 73
76static unsigned char *input_data; 74static unsigned char *input_data;
77static int input_len; 75static int input_len;
@@ -82,9 +80,6 @@ static unsigned long output_ptr = 0;
82 80
83#include "m32r_sio.c" 81#include "m32r_sio.c"
84 82
85static void *malloc(int size);
86static void free(void *where);
87
88static unsigned long free_mem_ptr; 83static unsigned long free_mem_ptr;
89static unsigned long free_mem_end_ptr; 84static unsigned long free_mem_end_ptr;
90 85
@@ -92,38 +87,6 @@ static unsigned long free_mem_end_ptr;
92 87
93#include "../../../../lib/inflate.c" 88#include "../../../../lib/inflate.c"
94 89
95static void *malloc(int size)
96{
97 void *p;
98
99 if (size <0) error("Malloc error");
100 if (free_mem_ptr == 0) error("Memory error");
101
102 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
103
104 p = (void *)free_mem_ptr;
105 free_mem_ptr += size;
106
107 if (free_mem_ptr >= free_mem_end_ptr)
108 error("Out of memory");
109
110 return p;
111}
112
113static void free(void *where)
114{ /* Don't care */
115}
116
117static void gzip_mark(void **ptr)
118{
119 *ptr = (void *) free_mem_ptr;
120}
121
122static void gzip_release(void **ptr)
123{
124 free_mem_ptr = (long) *ptr;
125}
126
127void* memset(void* s, int c, size_t n) 90void* memset(void* s, int c, size_t n)
128{ 91{
129 int i; 92 int i;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b9c754f4070c..b4c4eaa5dd26 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -713,7 +713,7 @@ config CSRC_SB1250
713 713
714config GPIO_TXX9 714config GPIO_TXX9
715 select GENERIC_GPIO 715 select GENERIC_GPIO
716 select HAVE_GPIO_LIB 716 select ARCH_REQUIRE_GPIOLIB
717 bool 717 bool
718 718
719config CFE 719config CFE
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index c266211ed653..2fefb14414b7 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -11,7 +11,6 @@
11#include <linux/file.h> 11#include <linux/file.h>
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/highuid.h> 13#include <linux/highuid.h>
14#include <linux/dirent.h>
15#include <linux/resource.h> 14#include <linux/resource.h>
16#include <linux/highmem.h> 15#include <linux/highmem.h>
17#include <linux/time.h> 16#include <linux/time.h>
diff --git a/arch/mn10300/boot/compressed/misc.c b/arch/mn10300/boot/compressed/misc.c
index ded207efc97a..f673383518e4 100644
--- a/arch/mn10300/boot/compressed/misc.c
+++ b/arch/mn10300/boot/compressed/misc.c
@@ -153,26 +153,9 @@ static uch *output_data;
153static unsigned long output_ptr; 153static unsigned long output_ptr;
154 154
155 155
156static void *malloc(int size);
157
158static inline void free(void *where)
159{ /* Don't care */
160}
161
162static unsigned long free_mem_ptr = (unsigned long) &end; 156static unsigned long free_mem_ptr = (unsigned long) &end;
163static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000; 157static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000;
164 158
165static inline void gzip_mark(void **ptr)
166{
167 kputs(".");
168 *ptr = (void *) free_mem_ptr;
169}
170
171static inline void gzip_release(void **ptr)
172{
173 free_mem_ptr = (unsigned long) *ptr;
174}
175
176#define INPLACE_MOVE_ROUTINE 0x1000 159#define INPLACE_MOVE_ROUTINE 0x1000
177#define LOW_BUFFER_START 0x2000 160#define LOW_BUFFER_START 0x2000
178#define LOW_BUFFER_END 0x90000 161#define LOW_BUFFER_END 0x90000
@@ -186,26 +169,6 @@ static int lines, cols;
186 169
187#include "../../../../lib/inflate.c" 170#include "../../../../lib/inflate.c"
188 171
189static void *malloc(int size)
190{
191 void *p;
192
193 if (size < 0)
194 error("Malloc error\n");
195 if (!free_mem_ptr)
196 error("Memory error\n");
197
198 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
199
200 p = (void *) free_mem_ptr;
201 free_mem_ptr += size;
202
203 if (free_mem_ptr >= free_mem_end_ptr)
204 error("\nOut of memory\n");
205
206 return p;
207}
208
209static inline void scroll(void) 172static inline void scroll(void)
210{ 173{
211 int i; 174 int i;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a487671c282f..fe88418167c5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,8 +110,10 @@ config PPC
110 default y 110 default y
111 select HAVE_DYNAMIC_FTRACE 111 select HAVE_DYNAMIC_FTRACE
112 select HAVE_FTRACE 112 select HAVE_FTRACE
113 select ARCH_WANT_OPTIONAL_GPIOLIB
113 select HAVE_IDE 114 select HAVE_IDE
114 select HAVE_IOREMAP_PROT 115 select HAVE_IOREMAP_PROT
116 select HAVE_EFFICIENT_UNALIGNED_ACCESS
115 select HAVE_KPROBES 117 select HAVE_KPROBES
116 select HAVE_ARCH_KGDB 118 select HAVE_ARCH_KGDB
117 select HAVE_KRETPROBES 119 select HAVE_KRETPROBES
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b936a1dd0a50..25a052c16754 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,6 +23,9 @@
23struct cpu_spec* cur_cpu_spec = NULL; 23struct cpu_spec* cur_cpu_spec = NULL;
24EXPORT_SYMBOL(cur_cpu_spec); 24EXPORT_SYMBOL(cur_cpu_spec);
25 25
26/* The platform string corresponding to the real PVR */
27const char *powerpc_base_platform;
28
26/* NOTE: 29/* NOTE:
27 * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's 30 * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
28 * the responsibility of the appropriate CPU save/restore functions to 31 * the responsibility of the appropriate CPU save/restore functions to
@@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
1652 } else 1655 } else
1653 *t = *s; 1656 *t = *s;
1654 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; 1657 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
1658
1659 /*
1660 * Set the base platform string once; assumes
1661 * we're called with real pvr first.
1662 */
1663 if (powerpc_base_platform == NULL)
1664 powerpc_base_platform = t->platform;
1665
1655#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE) 1666#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
1656 /* ppc64 and booke expect identify_cpu to also call 1667 /* ppc64 and booke expect identify_cpu to also call
1657 * setup_cpu for that processor. I will consolidate 1668 * setup_cpu for that processor. I will consolidate
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index da52269aec1e..81c8324a4a3c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -148,7 +148,7 @@ transfer_to_handler:
148 /* Check to see if the dbcr0 register is set up to debug. Use the 148 /* Check to see if the dbcr0 register is set up to debug. Use the
149 internal debug mode bit to do this. */ 149 internal debug mode bit to do this. */
150 lwz r12,THREAD_DBCR0(r12) 150 lwz r12,THREAD_DBCR0(r12)
151 andis. r12,r12,DBCR0_IDM@h 151 andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
152 beq+ 3f 152 beq+ 3f
153 /* From user and task is ptraced - load up global dbcr0 */ 153 /* From user and task is ptraced - load up global dbcr0 */
154 li r12,-1 /* clear all pending debug events */ 154 li r12,-1 /* clear all pending debug events */
@@ -292,7 +292,7 @@ syscall_exit_cont:
292 /* If the process has its own DBCR0 value, load it up. The internal 292 /* If the process has its own DBCR0 value, load it up. The internal
293 debug mode bit tells us that dbcr0 should be loaded. */ 293 debug mode bit tells us that dbcr0 should be loaded. */
294 lwz r0,THREAD+THREAD_DBCR0(r2) 294 lwz r0,THREAD+THREAD_DBCR0(r2)
295 andis. r10,r0,DBCR0_IDM@h 295 andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
296 bnel- load_dbcr0 296 bnel- load_dbcr0
297#endif 297#endif
298#ifdef CONFIG_44x 298#ifdef CONFIG_44x
@@ -720,7 +720,7 @@ restore_user:
720 /* Check whether this process has its own DBCR0 value. The internal 720 /* Check whether this process has its own DBCR0 value. The internal
721 debug mode bit tells us that dbcr0 should be loaded. */ 721 debug mode bit tells us that dbcr0 should be loaded. */
722 lwz r0,THREAD+THREAD_DBCR0(r2) 722 lwz r0,THREAD+THREAD_DBCR0(r2)
723 andis. r10,r0,DBCR0_IDM@h 723 andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
724 bnel- load_dbcr0 724 bnel- load_dbcr0
725#endif 725#endif
726 726
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2385f68c1751..550a19399bfa 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -49,6 +49,8 @@ static int novmerge = 1;
49 49
50static int protect4gb = 1; 50static int protect4gb = 1;
51 51
52static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
53
52static inline unsigned long iommu_num_pages(unsigned long vaddr, 54static inline unsigned long iommu_num_pages(unsigned long vaddr,
53 unsigned long slen) 55 unsigned long slen)
54{ 56{
@@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
191{ 193{
192 unsigned long entry, flags; 194 unsigned long entry, flags;
193 dma_addr_t ret = DMA_ERROR_CODE; 195 dma_addr_t ret = DMA_ERROR_CODE;
196 int build_fail;
194 197
195 spin_lock_irqsave(&(tbl->it_lock), flags); 198 spin_lock_irqsave(&(tbl->it_lock), flags);
196 199
@@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
205 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ 208 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
206 209
207 /* Put the TCEs in the HW table */ 210 /* Put the TCEs in the HW table */
208 ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK, 211 build_fail = ppc_md.tce_build(tbl, entry, npages,
209 direction, attrs); 212 (unsigned long)page & IOMMU_PAGE_MASK,
213 direction, attrs);
214
215 /* ppc_md.tce_build() only returns non-zero for transient errors.
216 * Clean up the table bitmap in this case and return
217 * DMA_ERROR_CODE. For all other errors the functionality is
218 * not altered.
219 */
220 if (unlikely(build_fail)) {
221 __iommu_free(tbl, ret, npages);
210 222
223 spin_unlock_irqrestore(&(tbl->it_lock), flags);
224 return DMA_ERROR_CODE;
225 }
211 226
212 /* Flush/invalidate TLB caches if necessary */ 227 /* Flush/invalidate TLB caches if necessary */
213 if (ppc_md.tce_flush) 228 if (ppc_md.tce_flush)
@@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
276 dma_addr_t dma_next = 0, dma_addr; 291 dma_addr_t dma_next = 0, dma_addr;
277 unsigned long flags; 292 unsigned long flags;
278 struct scatterlist *s, *outs, *segstart; 293 struct scatterlist *s, *outs, *segstart;
279 int outcount, incount, i; 294 int outcount, incount, i, build_fail = 0;
280 unsigned int align; 295 unsigned int align;
281 unsigned long handle; 296 unsigned long handle;
282 unsigned int max_seg_size; 297 unsigned int max_seg_size;
@@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
337 npages, entry, dma_addr); 352 npages, entry, dma_addr);
338 353
339 /* Insert into HW table */ 354 /* Insert into HW table */
340 ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, 355 build_fail = ppc_md.tce_build(tbl, entry, npages,
341 direction, attrs); 356 vaddr & IOMMU_PAGE_MASK,
357 direction, attrs);
358 if(unlikely(build_fail))
359 goto failure;
342 360
343 /* If we are in an open segment, try merging */ 361 /* If we are in an open segment, try merging */
344 if (segstart != s) { 362 if (segstart != s) {
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 4ba2af125450..de79915452c8 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
144 kcb->kprobe_saved_msr = regs->msr; 144 kcb->kprobe_saved_msr = regs->msr;
145} 145}
146 146
147/* Called with kretprobe_lock held */
148void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 147void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
149 struct pt_regs *regs) 148 struct pt_regs *regs)
150{ 149{
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
312 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 311 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
313 312
314 INIT_HLIST_HEAD(&empty_rp); 313 INIT_HLIST_HEAD(&empty_rp);
315 spin_lock_irqsave(&kretprobe_lock, flags); 314 kretprobe_hash_lock(current, &head, &flags);
316 head = kretprobe_inst_table_head(current);
317 315
318 /* 316 /*
319 * It is possible to have multiple instances associated with a given 317 * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
352 regs->nip = orig_ret_address; 350 regs->nip = orig_ret_address;
353 351
354 reset_current_kprobe(); 352 reset_current_kprobe();
355 spin_unlock_irqrestore(&kretprobe_lock, flags); 353 kretprobe_hash_unlock(current, &flags);
356 preempt_enable_no_resched(); 354 preempt_enable_no_resched();
357 355
358 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 356 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 827a5726a035..9f856a0c3e38 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -34,8 +34,9 @@
34#include <asm/time.h> 34#include <asm/time.h>
35#include <asm/prom.h> 35#include <asm/prom.h>
36#include <asm/vdso_datapage.h> 36#include <asm/vdso_datapage.h>
37#include <asm/vio.h>
37 38
38#define MODULE_VERS "1.7" 39#define MODULE_VERS "1.8"
39#define MODULE_NAME "lparcfg" 40#define MODULE_NAME "lparcfg"
40 41
41/* #define LPARCFG_DEBUG */ 42/* #define LPARCFG_DEBUG */
@@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
129/* 130/*
130 * Methods used to fetch LPAR data when running on a pSeries platform. 131 * Methods used to fetch LPAR data when running on a pSeries platform.
131 */ 132 */
132static void log_plpar_hcall_return(unsigned long rc, char *tag) 133/**
134 * h_get_mpp
135 * H_GET_MPP hcall returns info in 7 parms
136 */
137int h_get_mpp(struct hvcall_mpp_data *mpp_data)
133{ 138{
134 switch(rc) { 139 int rc;
135 case 0: 140 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
136 return; 141
137 case H_HARDWARE: 142 rc = plpar_hcall9(H_GET_MPP, retbuf);
138 printk(KERN_INFO "plpar-hcall (%s) " 143
139 "Hardware fault\n", tag); 144 mpp_data->entitled_mem = retbuf[0];
140 return; 145 mpp_data->mapped_mem = retbuf[1];
141 case H_FUNCTION: 146
142 printk(KERN_INFO "plpar-hcall (%s) " 147 mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
143 "Function not allowed\n", tag); 148 mpp_data->pool_num = retbuf[2] & 0xffff;
144 return; 149
145 case H_AUTHORITY: 150 mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
146 printk(KERN_INFO "plpar-hcall (%s) " 151 mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
147 "Not authorized to this function\n", tag); 152 mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
148 return; 153
149 case H_PARAMETER: 154 mpp_data->pool_size = retbuf[4];
150 printk(KERN_INFO "plpar-hcall (%s) " 155 mpp_data->loan_request = retbuf[5];
151 "Bad parameter(s)\n",tag); 156 mpp_data->backing_mem = retbuf[6];
152 return; 157
153 default: 158 return rc;
154 printk(KERN_INFO "plpar-hcall (%s) "
155 "Unexpected rc(0x%lx)\n", tag, rc);
156 }
157} 159}
160EXPORT_SYMBOL(h_get_mpp);
161
162struct hvcall_ppp_data {
163 u64 entitlement;
164 u64 unallocated_entitlement;
165 u16 group_num;
166 u16 pool_num;
167 u8 capped;
168 u8 weight;
169 u8 unallocated_weight;
170 u16 active_procs_in_pool;
171 u16 active_system_procs;
172};
158 173
159/* 174/*
160 * H_GET_PPP hcall returns info in 4 parms. 175 * H_GET_PPP hcall returns info in 4 parms.
@@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag)
176 * XXXX - Active processors in Physical Processor Pool. 191 * XXXX - Active processors in Physical Processor Pool.
177 * XXXX - Processors active on platform. 192 * XXXX - Processors active on platform.
178 */ 193 */
179static unsigned int h_get_ppp(unsigned long *entitled, 194static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
180 unsigned long *unallocated,
181 unsigned long *aggregation,
182 unsigned long *resource)
183{ 195{
184 unsigned long rc; 196 unsigned long rc;
185 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 197 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
186 198
187 rc = plpar_hcall(H_GET_PPP, retbuf); 199 rc = plpar_hcall(H_GET_PPP, retbuf);
188 200
189 *entitled = retbuf[0]; 201 ppp_data->entitlement = retbuf[0];
190 *unallocated = retbuf[1]; 202 ppp_data->unallocated_entitlement = retbuf[1];
191 *aggregation = retbuf[2]; 203
192 *resource = retbuf[3]; 204 ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
205 ppp_data->pool_num = retbuf[2] & 0xffff;
193 206
194 log_plpar_hcall_return(rc, "H_GET_PPP"); 207 ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
208 ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
209 ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
210 ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
211 ppp_data->active_system_procs = retbuf[3] & 0xffff;
195 212
196 return rc; 213 return rc;
197} 214}
198 215
199static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) 216static unsigned h_pic(unsigned long *pool_idle_time,
217 unsigned long *num_procs)
200{ 218{
201 unsigned long rc; 219 unsigned long rc;
202 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 220 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
206 *pool_idle_time = retbuf[0]; 224 *pool_idle_time = retbuf[0];
207 *num_procs = retbuf[1]; 225 *num_procs = retbuf[1];
208 226
209 if (rc != H_AUTHORITY) 227 return rc;
210 log_plpar_hcall_return(rc, "H_PIC"); 228}
229
230/*
231 * parse_ppp_data
232 * Parse out the data returned from h_get_ppp and h_pic
233 */
234static void parse_ppp_data(struct seq_file *m)
235{
236 struct hvcall_ppp_data ppp_data;
237 int rc;
238
239 rc = h_get_ppp(&ppp_data);
240 if (rc)
241 return;
242
243 seq_printf(m, "partition_entitled_capacity=%ld\n",
244 ppp_data.entitlement);
245 seq_printf(m, "group=%d\n", ppp_data.group_num);
246 seq_printf(m, "system_active_processors=%d\n",
247 ppp_data.active_system_procs);
248
249 /* pool related entries are apropriate for shared configs */
250 if (lppaca[0].shared_proc) {
251 unsigned long pool_idle_time, pool_procs;
252
253 seq_printf(m, "pool=%d\n", ppp_data.pool_num);
254
255 /* report pool_capacity in percentage */
256 seq_printf(m, "pool_capacity=%d\n",
257 ppp_data.active_procs_in_pool * 100);
258
259 h_pic(&pool_idle_time, &pool_procs);
260 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
261 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
262 }
263
264 seq_printf(m, "unallocated_capacity_weight=%d\n",
265 ppp_data.unallocated_weight);
266 seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
267 seq_printf(m, "capped=%d\n", ppp_data.capped);
268 seq_printf(m, "unallocated_capacity=%ld\n",
269 ppp_data.unallocated_entitlement);
270}
271
272/**
273 * parse_mpp_data
274 * Parse out data returned from h_get_mpp
275 */
276static void parse_mpp_data(struct seq_file *m)
277{
278 struct hvcall_mpp_data mpp_data;
279 int rc;
280
281 rc = h_get_mpp(&mpp_data);
282 if (rc)
283 return;
284
285 seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
286
287 if (mpp_data.mapped_mem != -1)
288 seq_printf(m, "mapped_entitled_memory=%ld\n",
289 mpp_data.mapped_mem);
290
291 seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
292 seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
293
294 seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
295 seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
296 mpp_data.unallocated_mem_weight);
297 seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
298 mpp_data.unallocated_entitlement);
299
300 if (mpp_data.pool_size != -1)
301 seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
302 mpp_data.pool_size);
303
304 seq_printf(m, "entitled_memory_loan_request=%ld\n",
305 mpp_data.loan_request);
306
307 seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
211} 308}
212 309
213#define SPLPAR_CHARACTERISTICS_TOKEN 20 310#define SPLPAR_CHARACTERISTICS_TOKEN 20
@@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void)
313 return count; 410 return count;
314} 411}
315 412
413static void pseries_cmo_data(struct seq_file *m)
414{
415 int cpu;
416 unsigned long cmo_faults = 0;
417 unsigned long cmo_fault_time = 0;
418
419 if (!firmware_has_feature(FW_FEATURE_CMO))
420 return;
421
422 for_each_possible_cpu(cpu) {
423 cmo_faults += lppaca[cpu].cmo_faults;
424 cmo_fault_time += lppaca[cpu].cmo_fault_time;
425 }
426
427 seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
428 seq_printf(m, "cmo_fault_time_usec=%lu\n",
429 cmo_fault_time / tb_ticks_per_usec);
430}
431
316static int pseries_lparcfg_data(struct seq_file *m, void *v) 432static int pseries_lparcfg_data(struct seq_file *m, void *v)
317{ 433{
318 int partition_potential_processors; 434 int partition_potential_processors;
@@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
334 partition_active_processors = lparcfg_count_active_processors(); 450 partition_active_processors = lparcfg_count_active_processors();
335 451
336 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 452 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
337 unsigned long h_entitled, h_unallocated;
338 unsigned long h_aggregation, h_resource;
339 unsigned long pool_idle_time, pool_procs;
340 unsigned long purr;
341
342 h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
343 &h_resource);
344
345 seq_printf(m, "R4=0x%lx\n", h_entitled);
346 seq_printf(m, "R5=0x%lx\n", h_unallocated);
347 seq_printf(m, "R6=0x%lx\n", h_aggregation);
348 seq_printf(m, "R7=0x%lx\n", h_resource);
349
350 purr = get_purr();
351
352 /* this call handles the ibm,get-system-parameter contents */ 453 /* this call handles the ibm,get-system-parameter contents */
353 parse_system_parameter_string(m); 454 parse_system_parameter_string(m);
455 parse_ppp_data(m);
456 parse_mpp_data(m);
457 pseries_cmo_data(m);
354 458
355 seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); 459 seq_printf(m, "purr=%ld\n", get_purr());
356
357 seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
358
359 seq_printf(m, "system_active_processors=%ld\n",
360 (h_resource >> 0 * 8) & 0xffff);
361
362 /* pool related entries are apropriate for shared configs */
363 if (lppaca[0].shared_proc) {
364
365 h_pic(&pool_idle_time, &pool_procs);
366
367 seq_printf(m, "pool=%ld\n",
368 (h_aggregation >> 0 * 8) & 0xffff);
369
370 /* report pool_capacity in percentage */
371 seq_printf(m, "pool_capacity=%ld\n",
372 ((h_resource >> 2 * 8) & 0xffff) * 100);
373
374 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
375
376 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
377 }
378
379 seq_printf(m, "unallocated_capacity_weight=%ld\n",
380 (h_resource >> 4 * 8) & 0xFF);
381
382 seq_printf(m, "capacity_weight=%ld\n",
383 (h_resource >> 5 * 8) & 0xFF);
384
385 seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
386
387 seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
388
389 seq_printf(m, "purr=%ld\n", purr);
390
391 } else { /* non SPLPAR case */ 460 } else { /* non SPLPAR case */
392 461
393 seq_printf(m, "system_active_processors=%d\n", 462 seq_printf(m, "system_active_processors=%d\n",
@@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
414 return 0; 483 return 0;
415} 484}
416 485
486static ssize_t update_ppp(u64 *entitlement, u8 *weight)
487{
488 struct hvcall_ppp_data ppp_data;
489 u8 new_weight;
490 u64 new_entitled;
491 ssize_t retval;
492
493 /* Get our current parameters */
494 retval = h_get_ppp(&ppp_data);
495 if (retval)
496 return retval;
497
498 if (entitlement) {
499 new_weight = ppp_data.weight;
500 new_entitled = *entitlement;
501 } else if (weight) {
502 new_weight = *weight;
503 new_entitled = ppp_data.entitlement;
504 } else
505 return -EINVAL;
506
507 pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
508 __FUNCTION__, ppp_data.entitlement, ppp_data.weight);
509
510 pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
511 __FUNCTION__, new_entitled, new_weight);
512
513 retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
514 return retval;
515}
516
517/**
518 * update_mpp
519 *
520 * Update the memory entitlement and weight for the partition. Caller must
521 * specify either a new entitlement or weight, not both, to be updated
522 * since the h_set_mpp call takes both entitlement and weight as parameters.
523 */
524static ssize_t update_mpp(u64 *entitlement, u8 *weight)
525{
526 struct hvcall_mpp_data mpp_data;
527 u64 new_entitled;
528 u8 new_weight;
529 ssize_t rc;
530
531 if (entitlement) {
532 /* Check with vio to ensure the new memory entitlement
533 * can be handled.
534 */
535 rc = vio_cmo_entitlement_update(*entitlement);
536 if (rc)
537 return rc;
538 }
539
540 rc = h_get_mpp(&mpp_data);
541 if (rc)
542 return rc;
543
544 if (entitlement) {
545 new_weight = mpp_data.mem_weight;
546 new_entitled = *entitlement;
547 } else if (weight) {
548 new_weight = *weight;
549 new_entitled = mpp_data.entitled_mem;
550 } else
551 return -EINVAL;
552
553 pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
554 __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
555
556 pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
557 __FUNCTION__, new_entitled, new_weight);
558
559 rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
560 return rc;
561}
562
417/* 563/*
418 * Interface for changing system parameters (variable capacity weight 564 * Interface for changing system parameters (variable capacity weight
419 * and entitled capacity). Format of input is "param_name=value"; 565 * and entitled capacity). Format of input is "param_name=value";
@@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
427static ssize_t lparcfg_write(struct file *file, const char __user * buf, 573static ssize_t lparcfg_write(struct file *file, const char __user * buf,
428 size_t count, loff_t * off) 574 size_t count, loff_t * off)
429{ 575{
430 char *kbuf; 576 int kbuf_sz = 64;
577 char kbuf[kbuf_sz];
431 char *tmp; 578 char *tmp;
432 u64 new_entitled, *new_entitled_ptr = &new_entitled; 579 u64 new_entitled, *new_entitled_ptr = &new_entitled;
433 u8 new_weight, *new_weight_ptr = &new_weight; 580 u8 new_weight, *new_weight_ptr = &new_weight;
434 581 ssize_t retval;
435 unsigned long current_entitled; /* parameters for h_get_ppp */
436 unsigned long dummy;
437 unsigned long resource;
438 u8 current_weight;
439
440 ssize_t retval = -ENOMEM;
441 582
442 if (!firmware_has_feature(FW_FEATURE_SPLPAR) || 583 if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
443 firmware_has_feature(FW_FEATURE_ISERIES)) 584 firmware_has_feature(FW_FEATURE_ISERIES))
444 return -EINVAL; 585 return -EINVAL;
445 586
446 kbuf = kmalloc(count, GFP_KERNEL); 587 if (count > kbuf_sz)
447 if (!kbuf) 588 return -EINVAL;
448 goto out;
449 589
450 retval = -EFAULT;
451 if (copy_from_user(kbuf, buf, count)) 590 if (copy_from_user(kbuf, buf, count))
452 goto out; 591 return -EFAULT;
453 592
454 retval = -EINVAL;
455 kbuf[count - 1] = '\0'; 593 kbuf[count - 1] = '\0';
456 tmp = strchr(kbuf, '='); 594 tmp = strchr(kbuf, '=');
457 if (!tmp) 595 if (!tmp)
458 goto out; 596 return -EINVAL;
459 597
460 *tmp++ = '\0'; 598 *tmp++ = '\0';
461 599
@@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
463 char *endp; 601 char *endp;
464 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); 602 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
465 if (endp == tmp) 603 if (endp == tmp)
466 goto out; 604 return -EINVAL;
467 new_weight_ptr = &current_weight; 605
606 retval = update_ppp(new_entitled_ptr, NULL);
468 } else if (!strcmp(kbuf, "capacity_weight")) { 607 } else if (!strcmp(kbuf, "capacity_weight")) {
469 char *endp; 608 char *endp;
470 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); 609 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
471 if (endp == tmp) 610 if (endp == tmp)
472 goto out; 611 return -EINVAL;
473 new_entitled_ptr = &current_entitled;
474 } else
475 goto out;
476
477 /* Get our current parameters */
478 retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
479 if (retval) {
480 retval = -EIO;
481 goto out;
482 }
483
484 current_weight = (resource >> 5 * 8) & 0xFF;
485 612
486 pr_debug("%s: current_entitled = %lu, current_weight = %u\n", 613 retval = update_ppp(NULL, new_weight_ptr);
487 __func__, current_entitled, current_weight); 614 } else if (!strcmp(kbuf, "entitled_memory")) {
615 char *endp;
616 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
617 if (endp == tmp)
618 return -EINVAL;
488 619
489 pr_debug("%s: new_entitled = %lu, new_weight = %u\n", 620 retval = update_mpp(new_entitled_ptr, NULL);
490 __func__, *new_entitled_ptr, *new_weight_ptr); 621 } else if (!strcmp(kbuf, "entitled_memory_weight")) {
622 char *endp;
623 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
624 if (endp == tmp)
625 return -EINVAL;
491 626
492 retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, 627 retval = update_mpp(NULL, new_weight_ptr);
493 *new_weight_ptr); 628 } else
629 return -EINVAL;
494 630
495 if (retval == H_SUCCESS || retval == H_CONSTRAINED) { 631 if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
496 retval = count; 632 retval = count;
@@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
506 retval = -EIO; 642 retval = -EIO;
507 } 643 }
508 644
509out:
510 kfree(kbuf);
511 return retval; 645 return retval;
512} 646}
513 647
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 219f3634115e..db2497ccc111 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -47,6 +47,8 @@
47#ifdef CONFIG_PPC64 47#ifdef CONFIG_PPC64
48#include <asm/firmware.h> 48#include <asm/firmware.h>
49#endif 49#endif
50#include <linux/kprobes.h>
51#include <linux/kdebug.h>
50 52
51extern unsigned long _get_SP(void); 53extern unsigned long _get_SP(void);
52 54
@@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void)
239} 241}
240#endif /* CONFIG_SMP */ 242#endif /* CONFIG_SMP */
241 243
244void do_dabr(struct pt_regs *regs, unsigned long address,
245 unsigned long error_code)
246{
247 siginfo_t info;
248
249 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
250 11, SIGSEGV) == NOTIFY_STOP)
251 return;
252
253 if (debugger_dabr_match(regs))
254 return;
255
256 /* Clear the DAC and struct entries. One shot trigger */
257#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
258 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
259 | DBCR0_IDM));
260#endif
261
262 /* Clear the DABR */
263 set_dabr(0);
264
265 /* Deliver the signal to userspace */
266 info.si_signo = SIGTRAP;
267 info.si_errno = 0;
268 info.si_code = TRAP_HWBKPT;
269 info.si_addr = (void __user *)address;
270 force_sig_info(SIGTRAP, &info, current);
271}
272
242static DEFINE_PER_CPU(unsigned long, current_dabr); 273static DEFINE_PER_CPU(unsigned long, current_dabr);
243 274
244int set_dabr(unsigned long dabr) 275int set_dabr(unsigned long dabr)
@@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr)
254#if defined(CONFIG_PPC64) || defined(CONFIG_6xx) 285#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
255 mtspr(SPRN_DABR, dabr); 286 mtspr(SPRN_DABR, dabr);
256#endif 287#endif
288
289#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
290 mtspr(SPRN_DAC1, dabr);
291#endif
292
257 return 0; 293 return 0;
258} 294}
259 295
@@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
337 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) 373 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
338 set_dabr(new->thread.dabr); 374 set_dabr(new->thread.dabr);
339 375
376#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
377 /* If new thread DAC (HW breakpoint) is the same then leave it */
378 if (new->thread.dabr)
379 set_dabr(new->thread.dabr);
380#endif
381
340 new_thread = &new->thread; 382 new_thread = &new->thread;
341 old_thread = &current->thread; 383 old_thread = &current->thread;
342 384
@@ -525,6 +567,10 @@ void flush_thread(void)
525 if (current->thread.dabr) { 567 if (current->thread.dabr) {
526 current->thread.dabr = 0; 568 current->thread.dabr = 0;
527 set_dabr(0); 569 set_dabr(0);
570
571#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
572 current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
573#endif
528 } 574 }
529} 575}
530 576
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1ea8c8d3ce89..c4ab2195b9cb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void)
643#else 643#else
644#define OV5_MSI 0x00 644#define OV5_MSI 0x00
645#endif /* CONFIG_PCI_MSI */ 645#endif /* CONFIG_PCI_MSI */
646#ifdef CONFIG_PPC_SMLPAR
647#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
648#else
649#define OV5_CMO 0x00
650#endif
646 651
647/* 652/*
648 * The architecture vector has an array of PVR mask/value pairs, 653 * The architecture vector has an array of PVR mask/value pairs,
@@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = {
687 0, /* don't halt */ 692 0, /* don't halt */
688 693
689 /* option vector 5: PAPR/OF options */ 694 /* option vector 5: PAPR/OF options */
690 3 - 2, /* length */ 695 5 - 2, /* length */
691 0, /* don't ignore, don't halt */ 696 0, /* don't ignore, don't halt */
692 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | 697 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
693 OV5_DONATE_DEDICATE_CPU | OV5_MSI, 698 OV5_DONATE_DEDICATE_CPU | OV5_MSI,
699 0,
700 OV5_CMO,
694}; 701};
695 702
696/* Old method - ELF header with PT_NOTE sections */ 703/* Old method - ELF header with PT_NOTE sections */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8feb93e7890c..a5d0e78779c8 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task)
703 703
704 if (regs != NULL) { 704 if (regs != NULL) {
705#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 705#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
706 task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC; 706 task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
707 regs->msr |= MSR_DE; 707 regs->msr |= MSR_DE;
708#else 708#else
709 regs->msr |= MSR_SE; 709 regs->msr |= MSR_SE;
@@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task)
716{ 716{
717 struct pt_regs *regs = task->thread.regs; 717 struct pt_regs *regs = task->thread.regs;
718 718
719
720#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
721 /* If DAC then do not single step, skip */
722 if (task->thread.dabr)
723 return;
724#endif
725
719 if (regs != NULL) { 726 if (regs != NULL) {
720#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 727#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
721 task->thread.dbcr0 = 0; 728 task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
722 regs->msr &= ~MSR_DE; 729 regs->msr &= ~MSR_DE;
723#else 730#else
724 regs->msr &= ~MSR_SE; 731 regs->msr &= ~MSR_SE;
@@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task)
727 clear_tsk_thread_flag(task, TIF_SINGLESTEP); 734 clear_tsk_thread_flag(task, TIF_SINGLESTEP);
728} 735}
729 736
730static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, 737int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
731 unsigned long data) 738 unsigned long data)
732{ 739{
733 /* We only support one DABR and no IABRS at the moment */ 740 /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
741 * For embedded processors we support one DAC and no IAC's at the
742 * moment.
743 */
734 if (addr > 0) 744 if (addr > 0)
735 return -EINVAL; 745 return -EINVAL;
736 746
737 /* The bottom 3 bits are flags */
738 if ((data & ~0x7UL) >= TASK_SIZE) 747 if ((data & ~0x7UL) >= TASK_SIZE)
739 return -EIO; 748 return -EIO;
740 749
741 /* Ensure translation is on */ 750#ifdef CONFIG_PPC64
751
752 /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
753 * It was assumed, on previous implementations, that 3 bits were
754 * passed together with the data address, fitting the design of the
755 * DABR register, as follows:
756 *
757 * bit 0: Read flag
758 * bit 1: Write flag
759 * bit 2: Breakpoint translation
760 *
761 * Thus, we use them here as so.
762 */
763
764 /* Ensure breakpoint translation bit is set */
742 if (data && !(data & DABR_TRANSLATION)) 765 if (data && !(data & DABR_TRANSLATION))
743 return -EIO; 766 return -EIO;
744 767
768 /* Move contents to the DABR register */
745 task->thread.dabr = data; 769 task->thread.dabr = data;
770
771#endif
772#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
773
774 /* As described above, it was assumed 3 bits were passed with the data
775 * address, but we will assume only the mode bits will be passed
776 * as to not cause alignment restrictions for DAC-based processors.
777 */
778
779 /* DAC's hold the whole address without any mode flags */
780 task->thread.dabr = data & ~0x3UL;
781
782 if (task->thread.dabr == 0) {
783 task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
784 task->thread.regs->msr &= ~MSR_DE;
785 return 0;
786 }
787
788 /* Read or Write bits must be set */
789
790 if (!(data & 0x3UL))
791 return -EINVAL;
792
793 /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
794 register */
795 task->thread.dbcr0 = DBCR0_IDM;
796
797 /* Check for write and read flags and set DBCR0
798 accordingly */
799 if (data & 0x1UL)
800 task->thread.dbcr0 |= DBSR_DAC1R;
801 if (data & 0x2UL)
802 task->thread.dbcr0 |= DBSR_DAC1W;
803
804 task->thread.regs->msr |= MSR_DE;
805#endif
746 return 0; 806 return 0;
747} 807}
748 808
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index ad55488939c3..7aada783ec6a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
145 * user space. The DABR will have been cleared if it 145 * user space. The DABR will have been cleared if it
146 * triggered inside the kernel. 146 * triggered inside the kernel.
147 */ 147 */
148 if (current->thread.dabr) 148 if (current->thread.dabr) {
149 set_dabr(current->thread.dabr); 149 set_dabr(current->thread.dabr);
150#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
151 mtspr(SPRN_DBCR0, current->thread.dbcr0);
152#endif
153 }
150 154
151 if (is32) { 155 if (is32) {
152 if (ka.sa.sa_flags & SA_SIGINFO) 156 if (ka.sa.sa_flags & SA_SIGINFO)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index aba0ba95f062..800e5e9a087b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -529,7 +529,8 @@ static void register_nodes(void)
529#endif 529#endif
530 530
531/* Only valid if CPU is present. */ 531/* Only valid if CPU is present. */
532static ssize_t show_physical_id(struct sys_device *dev, char *buf) 532static ssize_t show_physical_id(struct sys_device *dev,
533 struct sysdev_attribute *attr, char *buf)
533{ 534{
534 struct cpu *cpu = container_of(dev, struct cpu, sysdev); 535 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
535 536
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 878fbddb6ae1..81ccb8dd1a54 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
1067 } 1067 }
1068 1068
1069 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); 1069 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1070 } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
1071 regs->msr &= ~MSR_DE;
1072
1073 if (user_mode(regs)) {
1074 current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
1075 DBCR0_IDM);
1076 } else {
1077 /* Disable DAC interupts */
1078 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
1079 DBSR_DAC1W | DBCR0_IDM));
1080
1081 /* Clear the DAC event */
1082 mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
1083 }
1084 /* Setup and send the trap to the handler */
1085 do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
1070 } 1086 }
1071} 1087}
1072#endif /* CONFIG_4xx || CONFIG_BOOKE */ 1088#endif /* CONFIG_4xx || CONFIG_BOOKE */
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index b77f8af7ddde..ade8aeaa2e70 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1,11 +1,12 @@
1/* 1/*
2 * IBM PowerPC Virtual I/O Infrastructure Support. 2 * IBM PowerPC Virtual I/O Infrastructure Support.
3 * 3 *
4 * Copyright (c) 2003-2005 IBM Corp. 4 * Copyright (c) 2003,2008 IBM Corp.
5 * Dave Engebretsen engebret@us.ibm.com 5 * Dave Engebretsen engebret@us.ibm.com
6 * Santiago Leon santil@us.ibm.com 6 * Santiago Leon santil@us.ibm.com
7 * Hollis Blanchard <hollisb@us.ibm.com> 7 * Hollis Blanchard <hollisb@us.ibm.com>
8 * Stephen Rothwell 8 * Stephen Rothwell
9 * Robert Jennings <rcjenn@us.ibm.com>
9 * 10 *
10 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */
46 .dev.bus = &vio_bus_type, 47 .dev.bus = &vio_bus_type,
47}; 48};
48 49
50#ifdef CONFIG_PPC_SMLPAR
51/**
52 * vio_cmo_pool - A pool of IO memory for CMO use
53 *
54 * @size: The size of the pool in bytes
55 * @free: The amount of free memory in the pool
56 */
57struct vio_cmo_pool {
58 size_t size;
59 size_t free;
60};
61
62/* How many ms to delay queued balance work */
63#define VIO_CMO_BALANCE_DELAY 100
64
65/* Portion out IO memory to CMO devices by this chunk size */
66#define VIO_CMO_BALANCE_CHUNK 131072
67
68/**
69 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
70 *
71 * @vio_dev: struct vio_dev pointer
72 * @list: pointer to other devices on bus that are being tracked
73 */
74struct vio_cmo_dev_entry {
75 struct vio_dev *viodev;
76 struct list_head list;
77};
78
79/**
80 * vio_cmo - VIO bus accounting structure for CMO entitlement
81 *
82 * @lock: spinlock for entire structure
83 * @balance_q: work queue for balancing system entitlement
84 * @device_list: list of CMO-enabled devices requiring entitlement
85 * @entitled: total system entitlement in bytes
86 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
87 * @excess: pool of excess entitlement not needed for device reserves or spare
88 * @spare: IO memory for device hotplug functionality
89 * @min: minimum necessary for system operation
90 * @desired: desired memory for system operation
91 * @curr: bytes currently allocated
92 * @high: high water mark for IO data usage
93 */
94struct vio_cmo {
95 spinlock_t lock;
96 struct delayed_work balance_q;
97 struct list_head device_list;
98 size_t entitled;
99 struct vio_cmo_pool reserve;
100 struct vio_cmo_pool excess;
101 size_t spare;
102 size_t min;
103 size_t desired;
104 size_t curr;
105 size_t high;
106} vio_cmo;
107
108/**
109 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
110 */
111static int vio_cmo_num_OF_devs(void)
112{
113 struct device_node *node_vroot;
114 int count = 0;
115
116 /*
117 * Count the number of vdevice entries with an
118 * ibm,my-dma-window OF property
119 */
120 node_vroot = of_find_node_by_name(NULL, "vdevice");
121 if (node_vroot) {
122 struct device_node *of_node;
123 struct property *prop;
124
125 for_each_child_of_node(node_vroot, of_node) {
126 prop = of_find_property(of_node, "ibm,my-dma-window",
127 NULL);
128 if (prop)
129 count++;
130 }
131 }
132 of_node_put(node_vroot);
133 return count;
134}
135
136/**
137 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
138 *
139 * @viodev: VIO device requesting IO memory
140 * @size: size of allocation requested
141 *
142 * Allocations come from memory reserved for the devices and any excess
143 * IO memory available to all devices. The spare pool used to service
144 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
145 * made available.
146 *
147 * Return codes:
148 * 0 for successful allocation and -ENOMEM for a failure
149 */
150static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
151{
152 unsigned long flags;
153 size_t reserve_free = 0;
154 size_t excess_free = 0;
155 int ret = -ENOMEM;
156
157 spin_lock_irqsave(&vio_cmo.lock, flags);
158
159 /* Determine the amount of free entitlement available in reserve */
160 if (viodev->cmo.entitled > viodev->cmo.allocated)
161 reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
162
163 /* If spare is not fulfilled, the excess pool can not be used. */
164 if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
165 excess_free = vio_cmo.excess.free;
166
167 /* The request can be satisfied */
168 if ((reserve_free + excess_free) >= size) {
169 vio_cmo.curr += size;
170 if (vio_cmo.curr > vio_cmo.high)
171 vio_cmo.high = vio_cmo.curr;
172 viodev->cmo.allocated += size;
173 size -= min(reserve_free, size);
174 vio_cmo.excess.free -= size;
175 ret = 0;
176 }
177
178 spin_unlock_irqrestore(&vio_cmo.lock, flags);
179 return ret;
180}
181
182/**
183 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
184 * @viodev: VIO device freeing IO memory
185 * @size: size of deallocation
186 *
187 * IO memory is freed by the device back to the correct memory pools.
188 * The spare pool is replenished first from either memory pool, then
189 * the reserve pool is used to reduce device entitlement, the excess
190 * pool is used to increase the reserve pool toward the desired entitlement
191 * target, and then the remaining memory is returned to the pools.
192 *
193 */
194static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
195{
196 unsigned long flags;
197 size_t spare_needed = 0;
198 size_t excess_freed = 0;
199 size_t reserve_freed = size;
200 size_t tmp;
201 int balance = 0;
202
203 spin_lock_irqsave(&vio_cmo.lock, flags);
204 vio_cmo.curr -= size;
205
206 /* Amount of memory freed from the excess pool */
207 if (viodev->cmo.allocated > viodev->cmo.entitled) {
208 excess_freed = min(reserve_freed, (viodev->cmo.allocated -
209 viodev->cmo.entitled));
210 reserve_freed -= excess_freed;
211 }
212
213 /* Remove allocation from device */
214 viodev->cmo.allocated -= (reserve_freed + excess_freed);
215
216 /* Spare is a subset of the reserve pool, replenish it first. */
217 spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
218
219 /*
220 * Replenish the spare in the reserve pool from the excess pool.
221 * This moves entitlement into the reserve pool.
222 */
223 if (spare_needed && excess_freed) {
224 tmp = min(excess_freed, spare_needed);
225 vio_cmo.excess.size -= tmp;
226 vio_cmo.reserve.size += tmp;
227 vio_cmo.spare += tmp;
228 excess_freed -= tmp;
229 spare_needed -= tmp;
230 balance = 1;
231 }
232
233 /*
234 * Replenish the spare in the reserve pool from the reserve pool.
235 * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
236 * if needed, and gives it to the spare pool. The amount of used
237 * memory in this pool does not change.
238 */
239 if (spare_needed && reserve_freed) {
240 tmp = min(spare_needed, min(reserve_freed,
241 (viodev->cmo.entitled -
242 VIO_CMO_MIN_ENT)));
243
244 vio_cmo.spare += tmp;
245 viodev->cmo.entitled -= tmp;
246 reserve_freed -= tmp;
247 spare_needed -= tmp;
248 balance = 1;
249 }
250
251 /*
252 * Increase the reserve pool until the desired allocation is met.
253 * Move an allocation freed from the excess pool into the reserve
254 * pool and schedule a balance operation.
255 */
256 if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
257 tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
258
259 vio_cmo.excess.size -= tmp;
260 vio_cmo.reserve.size += tmp;
261 excess_freed -= tmp;
262 balance = 1;
263 }
264
265 /* Return memory from the excess pool to that pool */
266 if (excess_freed)
267 vio_cmo.excess.free += excess_freed;
268
269 if (balance)
270 schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
271 spin_unlock_irqrestore(&vio_cmo.lock, flags);
272}
273
274/**
275 * vio_cmo_entitlement_update - Manage system entitlement changes
276 *
277 * @new_entitlement: new system entitlement to attempt to accommodate
278 *
279 * Increases in entitlement will be used to fulfill the spare entitlement
280 * and the rest is given to the excess pool. Decreases, if they are
281 * possible, come from the excess pool and from unused device entitlement
282 *
283 * Returns: 0 on success, -ENOMEM when change can not be made
284 */
285int vio_cmo_entitlement_update(size_t new_entitlement)
286{
287 struct vio_dev *viodev;
288 struct vio_cmo_dev_entry *dev_ent;
289 unsigned long flags;
290 size_t avail, delta, tmp;
291
292 spin_lock_irqsave(&vio_cmo.lock, flags);
293
294 /* Entitlement increases */
295 if (new_entitlement > vio_cmo.entitled) {
296 delta = new_entitlement - vio_cmo.entitled;
297
298 /* Fulfill spare allocation */
299 if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
300 tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
301 vio_cmo.spare += tmp;
302 vio_cmo.reserve.size += tmp;
303 delta -= tmp;
304 }
305
306 /* Remaining new allocation goes to the excess pool */
307 vio_cmo.entitled += delta;
308 vio_cmo.excess.size += delta;
309 vio_cmo.excess.free += delta;
310
311 goto out;
312 }
313
314 /* Entitlement decreases */
315 delta = vio_cmo.entitled - new_entitlement;
316 avail = vio_cmo.excess.free;
317
318 /*
319 * Need to check how much unused entitlement each device can
320 * sacrifice to fulfill entitlement change.
321 */
322 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
323 if (avail >= delta)
324 break;
325
326 viodev = dev_ent->viodev;
327 if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
328 (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
329 avail += viodev->cmo.entitled -
330 max_t(size_t, viodev->cmo.allocated,
331 VIO_CMO_MIN_ENT);
332 }
333
334 if (delta <= avail) {
335 vio_cmo.entitled -= delta;
336
337 /* Take entitlement from the excess pool first */
338 tmp = min(vio_cmo.excess.free, delta);
339 vio_cmo.excess.size -= tmp;
340 vio_cmo.excess.free -= tmp;
341 delta -= tmp;
342
343 /*
344 * Remove all but VIO_CMO_MIN_ENT bytes from devices
345 * until entitlement change is served
346 */
347 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
348 if (!delta)
349 break;
350
351 viodev = dev_ent->viodev;
352 tmp = 0;
353 if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
354 (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
355 tmp = viodev->cmo.entitled -
356 max_t(size_t, viodev->cmo.allocated,
357 VIO_CMO_MIN_ENT);
358 viodev->cmo.entitled -= min(tmp, delta);
359 delta -= min(tmp, delta);
360 }
361 } else {
362 spin_unlock_irqrestore(&vio_cmo.lock, flags);
363 return -ENOMEM;
364 }
365
366out:
367 schedule_delayed_work(&vio_cmo.balance_q, 0);
368 spin_unlock_irqrestore(&vio_cmo.lock, flags);
369 return 0;
370}
371
372/**
373 * vio_cmo_balance - Balance entitlement among devices
374 *
375 * @work: work queue structure for this operation
376 *
377 * Any system entitlement above the minimum needed for devices, or
378 * already allocated to devices, can be distributed to the devices.
379 * The list of devices is iterated through to recalculate the desired
380 * entitlement level and to determine how much entitlement above the
381 * minimum entitlement is allocated to devices.
382 *
383 * Small chunks of the available entitlement are given to devices until
384 * their requirements are fulfilled or there is no entitlement left to give.
385 * Upon completion sizes of the reserve and excess pools are calculated.
386 *
387 * The system minimum entitlement level is also recalculated here.
388 * Entitlement will be reserved for devices even after vio_bus_remove to
389 * accommodate reloading the driver. The OF tree is walked to count the
390 * number of devices present and this will remove entitlement for devices
391 * that have actually left the system after having vio_bus_remove called.
392 */
393static void vio_cmo_balance(struct work_struct *work)
394{
395 struct vio_cmo *cmo;
396 struct vio_dev *viodev;
397 struct vio_cmo_dev_entry *dev_ent;
398 unsigned long flags;
399 size_t avail = 0, level, chunk, need;
400 int devcount = 0, fulfilled;
401
402 cmo = container_of(work, struct vio_cmo, balance_q.work);
403
404 spin_lock_irqsave(&vio_cmo.lock, flags);
405
406 /* Calculate minimum entitlement and fulfill spare */
407 cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
408 BUG_ON(cmo->min > cmo->entitled);
409 cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
410 cmo->min += cmo->spare;
411 cmo->desired = cmo->min;
412
413 /*
414 * Determine how much entitlement is available and reset device
415 * entitlements
416 */
417 avail = cmo->entitled - cmo->spare;
418 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
419 viodev = dev_ent->viodev;
420 devcount++;
421 viodev->cmo.entitled = VIO_CMO_MIN_ENT;
422 cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
423 avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
424 }
425
426 /*
427 * Having provided each device with the minimum entitlement, loop
428 * over the devices portioning out the remaining entitlement
429 * until there is nothing left.
430 */
431 level = VIO_CMO_MIN_ENT;
432 while (avail) {
433 fulfilled = 0;
434 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
435 viodev = dev_ent->viodev;
436
437 if (viodev->cmo.desired <= level) {
438 fulfilled++;
439 continue;
440 }
441
442 /*
443 * Give the device up to VIO_CMO_BALANCE_CHUNK
444 * bytes of entitlement, but do not exceed the
445 * desired level of entitlement for the device.
446 */
447 chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
448 chunk = min(chunk, (viodev->cmo.desired -
449 viodev->cmo.entitled));
450 viodev->cmo.entitled += chunk;
451
452 /*
453 * If the memory for this entitlement increase was
454 * already allocated to the device it does not come
455 * from the available pool being portioned out.
456 */
457 need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
458 max(viodev->cmo.allocated, level);
459 avail -= need;
460
461 }
462 if (fulfilled == devcount)
463 break;
464 level += VIO_CMO_BALANCE_CHUNK;
465 }
466
467 /* Calculate new reserve and excess pool sizes */
468 cmo->reserve.size = cmo->min;
469 cmo->excess.free = 0;
470 cmo->excess.size = 0;
471 need = 0;
472 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
473 viodev = dev_ent->viodev;
474 /* Calculated reserve size above the minimum entitlement */
475 if (viodev->cmo.entitled)
476 cmo->reserve.size += (viodev->cmo.entitled -
477 VIO_CMO_MIN_ENT);
478 /* Calculated used excess entitlement */
479 if (viodev->cmo.allocated > viodev->cmo.entitled)
480 need += viodev->cmo.allocated - viodev->cmo.entitled;
481 }
482 cmo->excess.size = cmo->entitled - cmo->reserve.size;
483 cmo->excess.free = cmo->excess.size - need;
484
485 cancel_delayed_work(container_of(work, struct delayed_work, work));
486 spin_unlock_irqrestore(&vio_cmo.lock, flags);
487}
488
489static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
490 dma_addr_t *dma_handle, gfp_t flag)
491{
492 struct vio_dev *viodev = to_vio_dev(dev);
493 void *ret;
494
495 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
496 atomic_inc(&viodev->cmo.allocs_failed);
497 return NULL;
498 }
499
500 ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
501 if (unlikely(ret == NULL)) {
502 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
503 atomic_inc(&viodev->cmo.allocs_failed);
504 }
505
506 return ret;
507}
508
509static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
510 void *vaddr, dma_addr_t dma_handle)
511{
512 struct vio_dev *viodev = to_vio_dev(dev);
513
514 dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
515
516 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
517}
518
519static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
520 size_t size,
521 enum dma_data_direction direction,
522 struct dma_attrs *attrs)
523{
524 struct vio_dev *viodev = to_vio_dev(dev);
525 dma_addr_t ret = DMA_ERROR_CODE;
526
527 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
528 atomic_inc(&viodev->cmo.allocs_failed);
529 return ret;
530 }
531
532 ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
533 if (unlikely(dma_mapping_error(ret))) {
534 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
535 atomic_inc(&viodev->cmo.allocs_failed);
536 }
537
538 return ret;
539}
540
541static void vio_dma_iommu_unmap_single(struct device *dev,
542 dma_addr_t dma_handle, size_t size,
543 enum dma_data_direction direction,
544 struct dma_attrs *attrs)
545{
546 struct vio_dev *viodev = to_vio_dev(dev);
547
548 dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
549
550 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
551}
552
553static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
554 int nelems, enum dma_data_direction direction,
555 struct dma_attrs *attrs)
556{
557 struct vio_dev *viodev = to_vio_dev(dev);
558 struct scatterlist *sgl;
559 int ret, count = 0;
560 size_t alloc_size = 0;
561
562 for (sgl = sglist; count < nelems; count++, sgl++)
563 alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
564
565 if (vio_cmo_alloc(viodev, alloc_size)) {
566 atomic_inc(&viodev->cmo.allocs_failed);
567 return 0;
568 }
569
570 ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
571
572 if (unlikely(!ret)) {
573 vio_cmo_dealloc(viodev, alloc_size);
574 atomic_inc(&viodev->cmo.allocs_failed);
575 }
576
577 for (sgl = sglist, count = 0; count < ret; count++, sgl++)
578 alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
579 if (alloc_size)
580 vio_cmo_dealloc(viodev, alloc_size);
581
582 return ret;
583}
584
585static void vio_dma_iommu_unmap_sg(struct device *dev,
586 struct scatterlist *sglist, int nelems,
587 enum dma_data_direction direction,
588 struct dma_attrs *attrs)
589{
590 struct vio_dev *viodev = to_vio_dev(dev);
591 struct scatterlist *sgl;
592 size_t alloc_size = 0;
593 int count = 0;
594
595 for (sgl = sglist; count < nelems; count++, sgl++)
596 alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
597
598 dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
599
600 vio_cmo_dealloc(viodev, alloc_size);
601}
602
603struct dma_mapping_ops vio_dma_mapping_ops = {
604 .alloc_coherent = vio_dma_iommu_alloc_coherent,
605 .free_coherent = vio_dma_iommu_free_coherent,
606 .map_single = vio_dma_iommu_map_single,
607 .unmap_single = vio_dma_iommu_unmap_single,
608 .map_sg = vio_dma_iommu_map_sg,
609 .unmap_sg = vio_dma_iommu_unmap_sg,
610};
611
612/**
613 * vio_cmo_set_dev_desired - Set desired entitlement for a device
614 *
615 * @viodev: struct vio_dev for device to alter
616 * @new_desired: new desired entitlement level in bytes
617 *
618 * For use by devices to request a change to their entitlement at runtime or
619 * through sysfs. The desired entitlement level is changed and a balancing
620 * of system resources is scheduled to run in the future.
621 */
622void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
623{
624 unsigned long flags;
625 struct vio_cmo_dev_entry *dev_ent;
626 int found = 0;
627
628 if (!firmware_has_feature(FW_FEATURE_CMO))
629 return;
630
631 spin_lock_irqsave(&vio_cmo.lock, flags);
632 if (desired < VIO_CMO_MIN_ENT)
633 desired = VIO_CMO_MIN_ENT;
634
635 /*
636 * Changes will not be made for devices not in the device list.
637 * If it is not in the device list, then no driver is loaded
638 * for the device and it can not receive entitlement.
639 */
640 list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
641 if (viodev == dev_ent->viodev) {
642 found = 1;
643 break;
644 }
645 if (!found)
646 return;
647
648 /* Increase/decrease in desired device entitlement */
649 if (desired >= viodev->cmo.desired) {
650 /* Just bump the bus and device values prior to a balance*/
651 vio_cmo.desired += desired - viodev->cmo.desired;
652 viodev->cmo.desired = desired;
653 } else {
654 /* Decrease bus and device values for desired entitlement */
655 vio_cmo.desired -= viodev->cmo.desired - desired;
656 viodev->cmo.desired = desired;
657 /*
658 * If less entitlement is desired than current entitlement, move
659 * any reserve memory in the change region to the excess pool.
660 */
661 if (viodev->cmo.entitled > desired) {
662 vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
663 vio_cmo.excess.size += viodev->cmo.entitled - desired;
664 /*
665 * If entitlement moving from the reserve pool to the
666 * excess pool is currently unused, add to the excess
667 * free counter.
668 */
669 if (viodev->cmo.allocated < viodev->cmo.entitled)
670 vio_cmo.excess.free += viodev->cmo.entitled -
671 max(viodev->cmo.allocated, desired);
672 viodev->cmo.entitled = desired;
673 }
674 }
675 schedule_delayed_work(&vio_cmo.balance_q, 0);
676 spin_unlock_irqrestore(&vio_cmo.lock, flags);
677}
678
679/**
680 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
681 *
682 * @viodev - Pointer to struct vio_dev for device
683 *
684 * Determine the devices IO memory entitlement needs, attempting
685 * to satisfy the system minimum entitlement at first and scheduling
686 * a balance operation to take care of the rest at a later time.
687 *
688 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
689 * -ENOMEM when entitlement is not available for device or
690 * device entry.
691 *
692 */
693static int vio_cmo_bus_probe(struct vio_dev *viodev)
694{
695 struct vio_cmo_dev_entry *dev_ent;
696 struct device *dev = &viodev->dev;
697 struct vio_driver *viodrv = to_vio_driver(dev->driver);
698 unsigned long flags;
699 size_t size;
700
701 /*
702 * Check to see that device has a DMA window and configure
703 * entitlement for the device.
704 */
705 if (of_get_property(viodev->dev.archdata.of_node,
706 "ibm,my-dma-window", NULL)) {
707 /* Check that the driver is CMO enabled and get desired DMA */
708 if (!viodrv->get_desired_dma) {
709 dev_err(dev, "%s: device driver does not support CMO\n",
710 __func__);
711 return -EINVAL;
712 }
713
714 viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
715 if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
716 viodev->cmo.desired = VIO_CMO_MIN_ENT;
717 size = VIO_CMO_MIN_ENT;
718
719 dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
720 GFP_KERNEL);
721 if (!dev_ent)
722 return -ENOMEM;
723
724 dev_ent->viodev = viodev;
725 spin_lock_irqsave(&vio_cmo.lock, flags);
726 list_add(&dev_ent->list, &vio_cmo.device_list);
727 } else {
728 viodev->cmo.desired = 0;
729 size = 0;
730 spin_lock_irqsave(&vio_cmo.lock, flags);
731 }
732
733 /*
734 * If the needs for vio_cmo.min have not changed since they
735 * were last set, the number of devices in the OF tree has
736 * been constant and the IO memory for this is already in
737 * the reserve pool.
738 */
739 if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
740 VIO_CMO_MIN_ENT)) {
741 /* Updated desired entitlement if device requires it */
742 if (size)
743 vio_cmo.desired += (viodev->cmo.desired -
744 VIO_CMO_MIN_ENT);
745 } else {
746 size_t tmp;
747
748 tmp = vio_cmo.spare + vio_cmo.excess.free;
749 if (tmp < size) {
750 dev_err(dev, "%s: insufficient free "
751 "entitlement to add device. "
752 "Need %lu, have %lu\n", __func__,
753 size, (vio_cmo.spare + tmp));
754 spin_unlock_irqrestore(&vio_cmo.lock, flags);
755 return -ENOMEM;
756 }
757
758 /* Use excess pool first to fulfill request */
759 tmp = min(size, vio_cmo.excess.free);
760 vio_cmo.excess.free -= tmp;
761 vio_cmo.excess.size -= tmp;
762 vio_cmo.reserve.size += tmp;
763
764 /* Use spare if excess pool was insufficient */
765 vio_cmo.spare -= size - tmp;
766
767 /* Update bus accounting */
768 vio_cmo.min += size;
769 vio_cmo.desired += viodev->cmo.desired;
770 }
771 spin_unlock_irqrestore(&vio_cmo.lock, flags);
772 return 0;
773}
774
775/**
776 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
777 *
778 * @viodev - Pointer to struct vio_dev for device
779 *
780 * Remove the device from the cmo device list. The minimum entitlement
781 * will be reserved for the device as long as it is in the system. The
782 * rest of the entitlement the device had been allocated will be returned
783 * to the system.
784 */
785static void vio_cmo_bus_remove(struct vio_dev *viodev)
786{
787 struct vio_cmo_dev_entry *dev_ent;
788 unsigned long flags;
789 size_t tmp;
790
791 spin_lock_irqsave(&vio_cmo.lock, flags);
792 if (viodev->cmo.allocated) {
793 dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
794 "allocated after remove operation.\n",
795 __func__, viodev->cmo.allocated);
796 BUG();
797 }
798
799 /*
800 * Remove the device from the device list being maintained for
801 * CMO enabled devices.
802 */
803 list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
804 if (viodev == dev_ent->viodev) {
805 list_del(&dev_ent->list);
806 kfree(dev_ent);
807 break;
808 }
809
810 /*
811 * Devices may not require any entitlement and they do not need
812 * to be processed. Otherwise, return the device's entitlement
813 * back to the pools.
814 */
815 if (viodev->cmo.entitled) {
816 /*
817 * This device has not yet left the OF tree, it's
818 * minimum entitlement remains in vio_cmo.min and
819 * vio_cmo.desired
820 */
821 vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
822
823 /*
824 * Save min allocation for device in reserve as long
825 * as it exists in OF tree as determined by later
826 * balance operation
827 */
828 viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
829
830 /* Replenish spare from freed reserve pool */
831 if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
832 tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
833 vio_cmo.spare));
834 vio_cmo.spare += tmp;
835 viodev->cmo.entitled -= tmp;
836 }
837
838 /* Remaining reserve goes to excess pool */
839 vio_cmo.excess.size += viodev->cmo.entitled;
840 vio_cmo.excess.free += viodev->cmo.entitled;
841 vio_cmo.reserve.size -= viodev->cmo.entitled;
842
843 /*
844 * Until the device is removed it will keep a
845 * minimum entitlement; this will guarantee that
846 * a module unload/load will result in a success.
847 */
848 viodev->cmo.entitled = VIO_CMO_MIN_ENT;
849 viodev->cmo.desired = VIO_CMO_MIN_ENT;
850 atomic_set(&viodev->cmo.allocs_failed, 0);
851 }
852
853 spin_unlock_irqrestore(&vio_cmo.lock, flags);
854}
855
856static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
857{
858 vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
859 viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
860}
861
862/**
863 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
864 *
865 * Set up the reserve and excess entitlement pools based on available
866 * system entitlement and the number of devices in the OF tree that
867 * require entitlement in the reserve pool.
868 */
869static void vio_cmo_bus_init(void)
870{
871 struct hvcall_mpp_data mpp_data;
872 int err;
873
874 memset(&vio_cmo, 0, sizeof(struct vio_cmo));
875 spin_lock_init(&vio_cmo.lock);
876 INIT_LIST_HEAD(&vio_cmo.device_list);
877 INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
878
879 /* Get current system entitlement */
880 err = h_get_mpp(&mpp_data);
881
882 /*
883 * On failure, continue with entitlement set to 0, will panic()
884 * later when spare is reserved.
885 */
886 if (err != H_SUCCESS) {
887 printk(KERN_ERR "%s: unable to determine system IO "\
888 "entitlement. (%d)\n", __func__, err);
889 vio_cmo.entitled = 0;
890 } else {
891 vio_cmo.entitled = mpp_data.entitled_mem;
892 }
893
894 /* Set reservation and check against entitlement */
895 vio_cmo.spare = VIO_CMO_MIN_ENT;
896 vio_cmo.reserve.size = vio_cmo.spare;
897 vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
898 VIO_CMO_MIN_ENT);
899 if (vio_cmo.reserve.size > vio_cmo.entitled) {
900 printk(KERN_ERR "%s: insufficient system entitlement\n",
901 __func__);
902 panic("%s: Insufficient system entitlement", __func__);
903 }
904
905 /* Set the remaining accounting variables */
906 vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
907 vio_cmo.excess.free = vio_cmo.excess.size;
908 vio_cmo.min = vio_cmo.reserve.size;
909 vio_cmo.desired = vio_cmo.reserve.size;
910}
911
912/* sysfs device functions and data structures for CMO */
913
914#define viodev_cmo_rd_attr(name) \
915static ssize_t viodev_cmo_##name##_show(struct device *dev, \
916 struct device_attribute *attr, \
917 char *buf) \
918{ \
919 return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
920}
921
922static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
923 struct device_attribute *attr, char *buf)
924{
925 struct vio_dev *viodev = to_vio_dev(dev);
926 return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
927}
928
929static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
930 struct device_attribute *attr, const char *buf, size_t count)
931{
932 struct vio_dev *viodev = to_vio_dev(dev);
933 atomic_set(&viodev->cmo.allocs_failed, 0);
934 return count;
935}
936
937static ssize_t viodev_cmo_desired_set(struct device *dev,
938 struct device_attribute *attr, const char *buf, size_t count)
939{
940 struct vio_dev *viodev = to_vio_dev(dev);
941 size_t new_desired;
942 int ret;
943
944 ret = strict_strtoul(buf, 10, &new_desired);
945 if (ret)
946 return ret;
947
948 vio_cmo_set_dev_desired(viodev, new_desired);
949 return count;
950}
951
952viodev_cmo_rd_attr(desired);
953viodev_cmo_rd_attr(entitled);
954viodev_cmo_rd_attr(allocated);
955
956static ssize_t name_show(struct device *, struct device_attribute *, char *);
957static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
958static struct device_attribute vio_cmo_dev_attrs[] = {
959 __ATTR_RO(name),
960 __ATTR_RO(devspec),
961 __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
962 viodev_cmo_desired_show, viodev_cmo_desired_set),
963 __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
964 __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
965 __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
966 viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
967 __ATTR_NULL
968};
969
970/* sysfs bus functions and data structures for CMO */
971
972#define viobus_cmo_rd_attr(name) \
973static ssize_t \
974viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \
975{ \
976 return sprintf(buf, "%lu\n", vio_cmo.name); \
977}
978
979#define viobus_cmo_pool_rd_attr(name, var) \
980static ssize_t \
981viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \
982{ \
983 return sprintf(buf, "%lu\n", vio_cmo.name.var); \
984}
985
986static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
987 size_t count)
988{
989 unsigned long flags;
990
991 spin_lock_irqsave(&vio_cmo.lock, flags);
992 vio_cmo.high = vio_cmo.curr;
993 spin_unlock_irqrestore(&vio_cmo.lock, flags);
994
995 return count;
996}
997
998viobus_cmo_rd_attr(entitled);
999viobus_cmo_pool_rd_attr(reserve, size);
1000viobus_cmo_pool_rd_attr(excess, size);
1001viobus_cmo_pool_rd_attr(excess, free);
1002viobus_cmo_rd_attr(spare);
1003viobus_cmo_rd_attr(min);
1004viobus_cmo_rd_attr(desired);
1005viobus_cmo_rd_attr(curr);
1006viobus_cmo_rd_attr(high);
1007
1008static struct bus_attribute vio_cmo_bus_attrs[] = {
1009 __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
1010 __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
1011 __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
1012 __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
1013 __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL),
1014 __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL),
1015 __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
1016 __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL),
1017 __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
1018 viobus_cmo_high_show, viobus_cmo_high_reset),
1019 __ATTR_NULL
1020};
1021
1022static void vio_cmo_sysfs_init(void)
1023{
1024 vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
1025 vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
1026}
1027#else /* CONFIG_PPC_SMLPAR */
1028/* Dummy functions for iSeries platform */
1029int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1030void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1031static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
1032static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
1033static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
1034static void vio_cmo_bus_init() {}
1035static void vio_cmo_sysfs_init() { }
1036#endif /* CONFIG_PPC_SMLPAR */
1037EXPORT_SYMBOL(vio_cmo_entitlement_update);
1038EXPORT_SYMBOL(vio_cmo_set_dev_desired);
1039
49static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) 1040static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
50{ 1041{
51 const unsigned char *dma_window; 1042 const unsigned char *dma_window;
@@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev)
114 return error; 1105 return error;
115 1106
116 id = vio_match_device(viodrv->id_table, viodev); 1107 id = vio_match_device(viodrv->id_table, viodev);
117 if (id) 1108 if (id) {
1109 memset(&viodev->cmo, 0, sizeof(viodev->cmo));
1110 if (firmware_has_feature(FW_FEATURE_CMO)) {
1111 error = vio_cmo_bus_probe(viodev);
1112 if (error)
1113 return error;
1114 }
118 error = viodrv->probe(viodev, id); 1115 error = viodrv->probe(viodev, id);
1116 if (error)
1117 vio_cmo_bus_remove(viodev);
1118 }
119 1119
120 return error; 1120 return error;
121} 1121}
@@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev)
125{ 1125{
126 struct vio_dev *viodev = to_vio_dev(dev); 1126 struct vio_dev *viodev = to_vio_dev(dev);
127 struct vio_driver *viodrv = to_vio_driver(dev->driver); 1127 struct vio_driver *viodrv = to_vio_driver(dev->driver);
1128 struct device *devptr;
1129 int ret = 1;
1130
1131 /*
1132 * Hold a reference to the device after the remove function is called
1133 * to allow for CMO accounting cleanup for the device.
1134 */
1135 devptr = get_device(dev);
128 1136
129 if (viodrv->remove) 1137 if (viodrv->remove)
130 return viodrv->remove(viodev); 1138 ret = viodrv->remove(viodev);
1139
1140 if (!ret && firmware_has_feature(FW_FEATURE_CMO))
1141 vio_cmo_bus_remove(viodev);
131 1142
132 /* driver can't remove */ 1143 put_device(devptr);
133 return 1; 1144 return ret;
134} 1145}
135 1146
136/** 1147/**
@@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
215 viodev->unit_address = *unit_address; 1226 viodev->unit_address = *unit_address;
216 } 1227 }
217 viodev->dev.archdata.of_node = of_node_get(of_node); 1228 viodev->dev.archdata.of_node = of_node_get(of_node);
218 viodev->dev.archdata.dma_ops = &dma_iommu_ops; 1229
1230 if (firmware_has_feature(FW_FEATURE_CMO))
1231 vio_cmo_set_dma_ops(viodev);
1232 else
1233 viodev->dev.archdata.dma_ops = &dma_iommu_ops;
219 viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); 1234 viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
220 viodev->dev.archdata.numa_node = of_node_to_nid(of_node); 1235 viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
221 1236
@@ -245,6 +1260,9 @@ static int __init vio_bus_init(void)
245 int err; 1260 int err;
246 struct device_node *node_vroot; 1261 struct device_node *node_vroot;
247 1262
1263 if (firmware_has_feature(FW_FEATURE_CMO))
1264 vio_cmo_sysfs_init();
1265
248 err = bus_register(&vio_bus_type); 1266 err = bus_register(&vio_bus_type);
249 if (err) { 1267 if (err) {
250 printk(KERN_ERR "failed to register VIO bus\n"); 1268 printk(KERN_ERR "failed to register VIO bus\n");
@@ -262,6 +1280,9 @@ static int __init vio_bus_init(void)
262 return err; 1280 return err;
263 } 1281 }
264 1282
1283 if (firmware_has_feature(FW_FEATURE_CMO))
1284 vio_cmo_bus_init();
1285
265 node_vroot = of_find_node_by_name(NULL, "vdevice"); 1286 node_vroot = of_find_node_by_name(NULL, "vdevice");
266 if (node_vroot) { 1287 if (node_vroot) {
267 struct device_node *of_node; 1288 struct device_node *of_node;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index a914411bced5..4a8ce62fe112 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
85 85
86 /* The dummy segment contents for the bug workaround mentioned above 86 /* The dummy segment contents for the bug workaround mentioned above
87 near PHDRS. */ 87 near PHDRS. */
88 .dummy : { 88 .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
89 LONG(0xf177) 89 LONG(0xf177)
90 } :kernel :dummy 90 } :kernel :dummy
91 91
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1707d00331fc..565b7a237c84 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs)
100 return 0; 100 return 0;
101} 101}
102 102
103#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
104static void do_dabr(struct pt_regs *regs, unsigned long address,
105 unsigned long error_code)
106{
107 siginfo_t info;
108
109 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
110 11, SIGSEGV) == NOTIFY_STOP)
111 return;
112
113 if (debugger_dabr_match(regs))
114 return;
115
116 /* Clear the DABR */
117 set_dabr(0);
118
119 /* Deliver the signal to userspace */
120 info.si_signo = SIGTRAP;
121 info.si_errno = 0;
122 info.si_code = TRAP_HWBKPT;
123 info.si_addr = (void __user *)address;
124 force_sig_info(SIGTRAP, &info, current);
125}
126#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
127
128/* 103/*
129 * For 600- and 800-family processors, the error_code parameter is DSISR 104 * For 600- and 800-family processors, the error_code parameter is DSISR
130 * for a data fault, SRR1 for an instruction fault. For 400-family processors 105 * for a data fault, SRR1 for an instruction fault. For 400-family processors
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index d664b1bce381..696a5ee4962d 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,6 @@
1config PPC_MPC52xx 1config PPC_MPC52xx
2 bool "52xx-based boards" 2 bool "52xx-based boards"
3 depends on PPC_MULTIPLATFORM && PPC32 3 depends on PPC_MULTIPLATFORM && PPC32
4 select FSL_SOC
5 select PPC_CLOCK 4 select PPC_CLOCK
6 select PPC_PCI_CHOICE 5 select PPC_PCI_CHOICE
7 6
@@ -48,6 +47,7 @@ config PPC_MPC5200_BUGFIX
48config PPC_MPC5200_GPIO 47config PPC_MPC5200_GPIO
49 bool "MPC5200 GPIO support" 48 bool "MPC5200 GPIO support"
50 depends on PPC_MPC52xx 49 depends on PPC_MPC52xx
51 select HAVE_GPIO_LIB 50 select ARCH_REQUIRE_GPIOLIB
51 select GENERIC_GPIO
52 help 52 help
53 Enable gpiolib support for mpc5200 based boards 53 Enable gpiolib support for mpc5200 based boards
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 208005ca262c..e06420af5fe9 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
172 } 172 }
173} 173}
174 174
175static void tce_build_cell(struct iommu_table *tbl, long index, long npages, 175static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
176 unsigned long uaddr, enum dma_data_direction direction, 176 unsigned long uaddr, enum dma_data_direction direction,
177 struct dma_attrs *attrs) 177 struct dma_attrs *attrs)
178{ 178{
@@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
213 213
214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", 214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
215 index, npages, direction, base_pte); 215 index, npages, direction, base_pte);
216 return 0;
216} 217}
217 218
218static void tce_free_cell(struct iommu_table *tbl, long index, long npages) 219static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
@@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled;
1150 1151
1151static int __init setup_iommu_fixed(char *str) 1152static int __init setup_iommu_fixed(char *str)
1152{ 1153{
1154 struct device_node *pciep;
1155
1153 if (strcmp(str, "off") == 0) 1156 if (strcmp(str, "off") == 0)
1154 iommu_fixed_disabled = 1; 1157 iommu_fixed_disabled = 1;
1155 1158
1156 else if (strcmp(str, "weak") == 0) 1159 /* If we can find a pcie-endpoint in the device tree assume that
1160 * we're on a triblade or a CAB so by default the fixed mapping
1161 * should be set to be weakly ordered; but only if the boot
1162 * option WASN'T set for strong ordering
1163 */
1164 pciep = of_find_node_by_type(NULL, "pcie-endpoint");
1165
1166 if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
1157 iommu_fixed_is_weak = 1; 1167 iommu_fixed_is_weak = 1;
1158 1168
1169 of_node_put(pciep);
1170
1159 return 1; 1171 return 1;
1160} 1172}
1161__setup("iommu_fixed=", setup_iommu_fixed); 1173__setup("iommu_fixed=", setup_iommu_fixed);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 34654743363d..2deeeba7eccf 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -312,11 +312,28 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
312 */ 312 */
313 node = cpu_to_node(raw_smp_processor_id()); 313 node = cpu_to_node(raw_smp_processor_id());
314 for (n = 0; n < MAX_NUMNODES; n++, node++) { 314 for (n = 0; n < MAX_NUMNODES; n++, node++) {
315 int available_spus;
316
315 node = (node < MAX_NUMNODES) ? node : 0; 317 node = (node < MAX_NUMNODES) ? node : 0;
316 if (!node_allowed(ctx, node)) 318 if (!node_allowed(ctx, node))
317 continue; 319 continue;
320
321 available_spus = 0;
318 mutex_lock(&cbe_spu_info[node].list_mutex); 322 mutex_lock(&cbe_spu_info[node].list_mutex);
319 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 323 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
324 if (spu->ctx && spu->ctx->gang
325 && spu->ctx->aff_offset == 0)
326 available_spus -=
327 (spu->ctx->gang->contexts - 1);
328 else
329 available_spus++;
330 }
331 if (available_spus < ctx->gang->contexts) {
332 mutex_unlock(&cbe_spu_info[node].list_mutex);
333 continue;
334 }
335
336 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
320 if ((!mem_aff || spu->has_mem_affinity) && 337 if ((!mem_aff || spu->has_mem_affinity) &&
321 sched_spu(spu)) { 338 sched_spu(spu)) {
322 mutex_unlock(&cbe_spu_info[node].list_mutex); 339 mutex_unlock(&cbe_spu_info[node].list_mutex);
@@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx)
389 if (list_empty(&ctx->aff_list)) 406 if (list_empty(&ctx->aff_list))
390 return 0; 407 return 0;
391 408
409 if (atomic_read(&ctx->gang->aff_sched_count) == 0)
410 ctx->gang->aff_ref_spu = NULL;
411
392 if (!gang->aff_ref_spu) { 412 if (!gang->aff_ref_spu) {
393 if (!(gang->aff_flags & AFF_MERGED)) 413 if (!(gang->aff_flags & AFF_MERGED))
394 aff_merge_remaining_ctxs(gang); 414 aff_merge_remaining_ctxs(gang);
@@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
416 if (spu->ctx->flags & SPU_CREATE_NOSCHED) 436 if (spu->ctx->flags & SPU_CREATE_NOSCHED)
417 atomic_dec(&cbe_spu_info[spu->node].reserved_spus); 437 atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
418 438
419 if (ctx->gang){ 439 if (ctx->gang)
420 mutex_lock(&ctx->gang->aff_mutex); 440 atomic_dec_if_positive(&ctx->gang->aff_sched_count);
421 if (has_affinity(ctx)) {
422 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
423 ctx->gang->aff_ref_spu = NULL;
424 }
425 mutex_unlock(&ctx->gang->aff_mutex);
426 }
427 441
428 spu_switch_notify(spu, NULL); 442 spu_switch_notify(spu, NULL);
429 spu_unmap_mappings(ctx); 443 spu_unmap_mappings(ctx);
@@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
562 goto found; 576 goto found;
563 mutex_unlock(&cbe_spu_info[node].list_mutex); 577 mutex_unlock(&cbe_spu_info[node].list_mutex);
564 578
565 mutex_lock(&ctx->gang->aff_mutex); 579 atomic_dec(&ctx->gang->aff_sched_count);
566 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
567 ctx->gang->aff_ref_spu = NULL;
568 mutex_unlock(&ctx->gang->aff_mutex);
569 goto not_found; 580 goto not_found;
570 } 581 }
571 mutex_unlock(&ctx->gang->aff_mutex); 582 mutex_unlock(&ctx->gang->aff_mutex);
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 8c0e95766a62..92d20e993ede 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -196,8 +196,7 @@ static int __init sputrace_init(void)
196 struct proc_dir_entry *entry; 196 struct proc_dir_entry *entry;
197 int i, error = -ENOMEM; 197 int i, error = -ENOMEM;
198 198
199 sputrace_log = kcalloc(sizeof(struct sputrace), 199 sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
200 bufsize, GFP_KERNEL);
201 if (!sputrace_log) 200 if (!sputrace_log)
202 goto out; 201 goto out;
203 202
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index bc818e4e2033..bb464d1211b2 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
41#include <asm/iseries/hv_call_event.h> 41#include <asm/iseries/hv_call_event.h>
42#include <asm/iseries/iommu.h> 42#include <asm/iseries/iommu.h>
43 43
44static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, 44static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
45 unsigned long uaddr, enum dma_data_direction direction, 45 unsigned long uaddr, enum dma_data_direction direction,
46 struct dma_attrs *attrs) 46 struct dma_attrs *attrs)
47{ 47{
@@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
71 index++; 71 index++;
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 } 73 }
74 return 0;
74} 75}
75 76
76static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) 77static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 70541b7a5013..a0ff03a3d8da 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
83static struct iommu_table iommu_table_iobmap; 83static struct iommu_table iommu_table_iobmap;
84static int iommu_table_iobmap_inited; 84static int iommu_table_iobmap_inited;
85 85
86static void iobmap_build(struct iommu_table *tbl, long index, 86static int iobmap_build(struct iommu_table *tbl, long index,
87 long npages, unsigned long uaddr, 87 long npages, unsigned long uaddr,
88 enum dma_data_direction direction, 88 enum dma_data_direction direction,
89 struct dma_attrs *attrs) 89 struct dma_attrs *attrs)
@@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index,
108 uaddr += IOBMAP_PAGE_SIZE; 108 uaddr += IOBMAP_PAGE_SIZE;
109 bus_addr += IOBMAP_PAGE_SIZE; 109 bus_addr += IOBMAP_PAGE_SIZE;
110 } 110 }
111 return 0;
111} 112}
112 113
113 114
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 757c0296e0b8..97619fd51e39 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG
40 depends on PPC_PSERIES && PPC_EARLY_DEBUG 40 depends on PPC_PSERIES && PPC_EARLY_DEBUG
41 bool "Enable extra debug logging in platforms/pseries" 41 bool "Enable extra debug logging in platforms/pseries"
42 default y 42 default y
43
44config PPC_SMLPAR
45 bool "Support for shared-memory logical partitions"
46 depends on PPC_PSERIES
47 select LPARCFG
48 default n
49 help
50 Select this option to enable shared memory partition support.
51 With this option a system running in an LPAR can be given more
52 memory than physically available and will allow firmware to
53 balance memory across many LPARs.
54
55config CMM
56 tristate "Collaborative memory management"
57 depends on PPC_SMLPAR
58 default y
59 help
60 Select this option, if you want to enable the kernel interface
61 to reduce the memory size of the system. This is accomplished
62 by allocating pages of memory and put them "on hold". This only
63 makes sense for a system running in an LPAR where the unused pages
64 will be reused for other LPARs. The interface allows firmware to
65 balance memory across many LPARs.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 554c6e42ef2a..dfe574af2dc0 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
24obj-$(CONFIG_HVCS) += hvcserver.o 24obj-$(CONFIG_HVCS) += hvcserver.o
25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o 25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o 26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
27obj-$(CONFIG_CMM) += cmm.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 000000000000..c6b3be03168b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,468 @@
1/*
2 * Collaborative memory management interface.
3 *
4 * Copyright (C) 2008 IBM Corporation
5 * Author(s): Brian King (brking@linux.vnet.ibm.com),
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <linux/ctype.h>
24#include <linux/delay.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/init.h>
28#include <linux/kthread.h>
29#include <linux/module.h>
30#include <linux/oom.h>
31#include <linux/sched.h>
32#include <linux/stringify.h>
33#include <linux/swap.h>
34#include <linux/sysdev.h>
35#include <asm/firmware.h>
36#include <asm/hvcall.h>
37#include <asm/mmu.h>
38#include <asm/pgalloc.h>
39#include <asm/uaccess.h>
40
41#include "plpar_wrappers.h"
42
43#define CMM_DRIVER_VERSION "1.0.0"
44#define CMM_DEFAULT_DELAY 1
45#define CMM_DEBUG 0
46#define CMM_DISABLE 0
47#define CMM_OOM_KB 1024
48#define CMM_MIN_MEM_MB 256
49#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
50#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
51
52static unsigned int delay = CMM_DEFAULT_DELAY;
53static unsigned int oom_kb = CMM_OOM_KB;
54static unsigned int cmm_debug = CMM_DEBUG;
55static unsigned int cmm_disabled = CMM_DISABLE;
56static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
57static struct sys_device cmm_sysdev;
58
59MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
60MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
61MODULE_LICENSE("GPL");
62MODULE_VERSION(CMM_DRIVER_VERSION);
63
64module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
65MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
66 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
67module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
68MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
69 "[Default=" __stringify(CMM_OOM_KB) "]");
70module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
71MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
72 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
73module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
74MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
75 "[Default=" __stringify(CMM_DEBUG) "]");
76
77#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
78
79#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
80
81struct cmm_page_array {
82 struct cmm_page_array *next;
83 unsigned long index;
84 unsigned long page[CMM_NR_PAGES];
85};
86
87static unsigned long loaned_pages;
88static unsigned long loaned_pages_target;
89static unsigned long oom_freed_pages;
90
91static struct cmm_page_array *cmm_page_list;
92static DEFINE_SPINLOCK(cmm_lock);
93
94static struct task_struct *cmm_thread_ptr;
95
96/**
97 * cmm_alloc_pages - Allocate pages and mark them as loaned
98 * @nr: number of pages to allocate
99 *
100 * Return value:
101 * number of pages requested to be allocated which were not
102 **/
103static long cmm_alloc_pages(long nr)
104{
105 struct cmm_page_array *pa, *npa;
106 unsigned long addr;
107 long rc;
108
109 cmm_dbg("Begin request for %ld pages\n", nr);
110
111 while (nr) {
112 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
113 __GFP_NORETRY | __GFP_NOMEMALLOC);
114 if (!addr)
115 break;
116 spin_lock(&cmm_lock);
117 pa = cmm_page_list;
118 if (!pa || pa->index >= CMM_NR_PAGES) {
119 /* Need a new page for the page list. */
120 spin_unlock(&cmm_lock);
121 npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
122 __GFP_NORETRY | __GFP_NOMEMALLOC);
123 if (!npa) {
124 pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
125 free_page(addr);
126 break;
127 }
128 spin_lock(&cmm_lock);
129 pa = cmm_page_list;
130
131 if (!pa || pa->index >= CMM_NR_PAGES) {
132 npa->next = pa;
133 npa->index = 0;
134 pa = npa;
135 cmm_page_list = pa;
136 } else
137 free_page((unsigned long) npa);
138 }
139
140 if ((rc = plpar_page_set_loaned(__pa(addr)))) {
141 pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
142 spin_unlock(&cmm_lock);
143 free_page(addr);
144 break;
145 }
146
147 pa->page[pa->index++] = addr;
148 loaned_pages++;
149 totalram_pages--;
150 spin_unlock(&cmm_lock);
151 nr--;
152 }
153
154 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
155 return nr;
156}
157
158/**
159 * cmm_free_pages - Free pages and mark them as active
160 * @nr: number of pages to free
161 *
162 * Return value:
163 * number of pages requested to be freed which were not
164 **/
165static long cmm_free_pages(long nr)
166{
167 struct cmm_page_array *pa;
168 unsigned long addr;
169
170 cmm_dbg("Begin free of %ld pages.\n", nr);
171 spin_lock(&cmm_lock);
172 pa = cmm_page_list;
173 while (nr) {
174 if (!pa || pa->index <= 0)
175 break;
176 addr = pa->page[--pa->index];
177
178 if (pa->index == 0) {
179 pa = pa->next;
180 free_page((unsigned long) cmm_page_list);
181 cmm_page_list = pa;
182 }
183
184 plpar_page_set_active(__pa(addr));
185 free_page(addr);
186 loaned_pages--;
187 nr--;
188 totalram_pages++;
189 }
190 spin_unlock(&cmm_lock);
191 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
192 return nr;
193}
194
195/**
196 * cmm_oom_notify - OOM notifier
197 * @self: notifier block struct
198 * @dummy: not used
199 * @parm: returned - number of pages freed
200 *
201 * Return value:
202 * NOTIFY_OK
203 **/
204static int cmm_oom_notify(struct notifier_block *self,
205 unsigned long dummy, void *parm)
206{
207 unsigned long *freed = parm;
208 long nr = KB2PAGES(oom_kb);
209
210 cmm_dbg("OOM processing started\n");
211 nr = cmm_free_pages(nr);
212 loaned_pages_target = loaned_pages;
213 *freed += KB2PAGES(oom_kb) - nr;
214 oom_freed_pages += KB2PAGES(oom_kb) - nr;
215 cmm_dbg("OOM processing complete\n");
216 return NOTIFY_OK;
217}
218
219/**
220 * cmm_get_mpp - Read memory performance parameters
221 *
222 * Makes hcall to query the current page loan request from the hypervisor.
223 *
224 * Return value:
225 * nothing
226 **/
227static void cmm_get_mpp(void)
228{
229 int rc;
230 struct hvcall_mpp_data mpp_data;
231 unsigned long active_pages_target;
232 signed long page_loan_request;
233
234 rc = h_get_mpp(&mpp_data);
235
236 if (rc != H_SUCCESS)
237 return;
238
239 page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
240 loaned_pages_target = page_loan_request + loaned_pages;
241 if (loaned_pages_target > oom_freed_pages)
242 loaned_pages_target -= oom_freed_pages;
243 else
244 loaned_pages_target = 0;
245
246 active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
247
248 if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
249 loaned_pages_target = totalram_pages + loaned_pages -
250 ((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
251
252 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
253 page_loan_request, loaned_pages, loaned_pages_target,
254 oom_freed_pages, totalram_pages);
255}
256
257static struct notifier_block cmm_oom_nb = {
258 .notifier_call = cmm_oom_notify
259};
260
261/**
262 * cmm_thread - CMM task thread
263 * @dummy: not used
264 *
265 * Return value:
266 * 0
267 **/
268static int cmm_thread(void *dummy)
269{
270 unsigned long timeleft;
271
272 while (1) {
273 timeleft = msleep_interruptible(delay * 1000);
274
275 if (kthread_should_stop() || timeleft) {
276 loaned_pages_target = loaned_pages;
277 break;
278 }
279
280 cmm_get_mpp();
281
282 if (loaned_pages_target > loaned_pages) {
283 if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
284 loaned_pages_target = loaned_pages;
285 } else if (loaned_pages_target < loaned_pages)
286 cmm_free_pages(loaned_pages - loaned_pages_target);
287 }
288 return 0;
289}
290
291#define CMM_SHOW(name, format, args...) \
292 static ssize_t show_##name(struct sys_device *dev, char *buf) \
293 { \
294 return sprintf(buf, format, ##args); \
295 } \
296 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
297
298CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
299CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
300
301static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
302{
303 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
304}
305
306static ssize_t store_oom_pages(struct sys_device *dev,
307 const char *buf, size_t count)
308{
309 unsigned long val = simple_strtoul (buf, NULL, 10);
310
311 if (!capable(CAP_SYS_ADMIN))
312 return -EPERM;
313 if (val != 0)
314 return -EBADMSG;
315
316 oom_freed_pages = 0;
317 return count;
318}
319
320static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
321 show_oom_pages, store_oom_pages);
322
323static struct sysdev_attribute *cmm_attrs[] = {
324 &attr_loaned_kb,
325 &attr_loaned_target_kb,
326 &attr_oom_freed_kb,
327};
328
329static struct sysdev_class cmm_sysdev_class = {
330 .name = "cmm",
331};
332
333/**
334 * cmm_sysfs_register - Register with sysfs
335 *
336 * Return value:
337 * 0 on success / other on failure
338 **/
339static int cmm_sysfs_register(struct sys_device *sysdev)
340{
341 int i, rc;
342
343 if ((rc = sysdev_class_register(&cmm_sysdev_class)))
344 return rc;
345
346 sysdev->id = 0;
347 sysdev->cls = &cmm_sysdev_class;
348
349 if ((rc = sysdev_register(sysdev)))
350 goto class_unregister;
351
352 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
353 if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
354 goto fail;
355 }
356
357 return 0;
358
359fail:
360 while (--i >= 0)
361 sysdev_remove_file(sysdev, cmm_attrs[i]);
362 sysdev_unregister(sysdev);
363class_unregister:
364 sysdev_class_unregister(&cmm_sysdev_class);
365 return rc;
366}
367
368/**
369 * cmm_unregister_sysfs - Unregister from sysfs
370 *
371 **/
372static void cmm_unregister_sysfs(struct sys_device *sysdev)
373{
374 int i;
375
376 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
377 sysdev_remove_file(sysdev, cmm_attrs[i]);
378 sysdev_unregister(sysdev);
379 sysdev_class_unregister(&cmm_sysdev_class);
380}
381
382/**
383 * cmm_init - Module initialization
384 *
385 * Return value:
386 * 0 on success / other on failure
387 **/
388static int cmm_init(void)
389{
390 int rc = -ENOMEM;
391
392 if (!firmware_has_feature(FW_FEATURE_CMO))
393 return -EOPNOTSUPP;
394
395 if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
396 return rc;
397
398 if ((rc = cmm_sysfs_register(&cmm_sysdev)))
399 goto out_oom_notifier;
400
401 if (cmm_disabled)
402 return rc;
403
404 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
405 if (IS_ERR(cmm_thread_ptr)) {
406 rc = PTR_ERR(cmm_thread_ptr);
407 goto out_unregister_sysfs;
408 }
409
410 return rc;
411
412out_unregister_sysfs:
413 cmm_unregister_sysfs(&cmm_sysdev);
414out_oom_notifier:
415 unregister_oom_notifier(&cmm_oom_nb);
416 return rc;
417}
418
419/**
420 * cmm_exit - Module exit
421 *
422 * Return value:
423 * nothing
424 **/
425static void cmm_exit(void)
426{
427 if (cmm_thread_ptr)
428 kthread_stop(cmm_thread_ptr);
429 unregister_oom_notifier(&cmm_oom_nb);
430 cmm_free_pages(loaned_pages);
431 cmm_unregister_sysfs(&cmm_sysdev);
432}
433
434/**
435 * cmm_set_disable - Disable/Enable CMM
436 *
437 * Return value:
438 * 0 on success / other on failure
439 **/
440static int cmm_set_disable(const char *val, struct kernel_param *kp)
441{
442 int disable = simple_strtoul(val, NULL, 10);
443
444 if (disable != 0 && disable != 1)
445 return -EINVAL;
446
447 if (disable && !cmm_disabled) {
448 if (cmm_thread_ptr)
449 kthread_stop(cmm_thread_ptr);
450 cmm_thread_ptr = NULL;
451 cmm_free_pages(loaned_pages);
452 } else if (!disable && cmm_disabled) {
453 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
454 if (IS_ERR(cmm_thread_ptr))
455 return PTR_ERR(cmm_thread_ptr);
456 }
457
458 cmm_disabled = disable;
459 return 0;
460}
461
462module_param_call(disable, cmm_set_disable, param_get_uint,
463 &cmm_disabled, S_IRUGO | S_IWUSR);
464MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
465 "[Default=" __stringify(CMM_DISABLE) "]");
466
467module_init(cmm_init);
468module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 5377dd4b849a..a8c446697f9e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -48,7 +48,7 @@
48#include "plpar_wrappers.h" 48#include "plpar_wrappers.h"
49 49
50 50
51static void tce_build_pSeries(struct iommu_table *tbl, long index, 51static int tce_build_pSeries(struct iommu_table *tbl, long index,
52 long npages, unsigned long uaddr, 52 long npages, unsigned long uaddr,
53 enum dma_data_direction direction, 53 enum dma_data_direction direction,
54 struct dma_attrs *attrs) 54 struct dma_attrs *attrs)
@@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 tcep++; 73 tcep++;
74 } 74 }
75 return 0;
75} 76}
76 77
77 78
@@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
94 return *tcep; 95 return *tcep;
95} 96}
96 97
97static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, 98static void tce_free_pSeriesLP(struct iommu_table*, long, long);
99static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
100
101static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
98 long npages, unsigned long uaddr, 102 long npages, unsigned long uaddr,
99 enum dma_data_direction direction, 103 enum dma_data_direction direction,
100 struct dma_attrs *attrs) 104 struct dma_attrs *attrs)
101{ 105{
102 u64 rc; 106 u64 rc = 0;
103 u64 proto_tce, tce; 107 u64 proto_tce, tce;
104 u64 rpn; 108 u64 rpn;
109 int ret = 0;
110 long tcenum_start = tcenum, npages_start = npages;
105 111
106 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; 112 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
107 proto_tce = TCE_PCI_READ; 113 proto_tce = TCE_PCI_READ;
@@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
112 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; 118 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
113 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); 119 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
114 120
121 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
122 ret = (int)rc;
123 tce_free_pSeriesLP(tbl, tcenum_start,
124 (npages_start - (npages + 1)));
125 break;
126 }
127
115 if (rc && printk_ratelimit()) { 128 if (rc && printk_ratelimit()) {
116 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 129 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
117 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 130 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
123 tcenum++; 136 tcenum++;
124 rpn++; 137 rpn++;
125 } 138 }
139 return ret;
126} 140}
127 141
128static DEFINE_PER_CPU(u64 *, tce_page) = NULL; 142static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
129 143
130static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, 144static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
131 long npages, unsigned long uaddr, 145 long npages, unsigned long uaddr,
132 enum dma_data_direction direction, 146 enum dma_data_direction direction,
133 struct dma_attrs *attrs) 147 struct dma_attrs *attrs)
134{ 148{
135 u64 rc; 149 u64 rc = 0;
136 u64 proto_tce; 150 u64 proto_tce;
137 u64 *tcep; 151 u64 *tcep;
138 u64 rpn; 152 u64 rpn;
139 long l, limit; 153 long l, limit;
154 long tcenum_start = tcenum, npages_start = npages;
155 int ret = 0;
140 156
141 if (npages == 1) { 157 if (npages == 1) {
142 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 158 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
143 direction, attrs); 159 direction, attrs);
144 return;
145 } 160 }
146 161
147 tcep = __get_cpu_var(tce_page); 162 tcep = __get_cpu_var(tce_page);
@@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
153 tcep = (u64 *)__get_free_page(GFP_ATOMIC); 168 tcep = (u64 *)__get_free_page(GFP_ATOMIC);
154 /* If allocation fails, fall back to the loop implementation */ 169 /* If allocation fails, fall back to the loop implementation */
155 if (!tcep) { 170 if (!tcep) {
156 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 171 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
157 direction, attrs); 172 direction, attrs);
158 return;
159 } 173 }
160 __get_cpu_var(tce_page) = tcep; 174 __get_cpu_var(tce_page) = tcep;
161 } 175 }
@@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
187 tcenum += limit; 201 tcenum += limit;
188 } while (npages > 0 && !rc); 202 } while (npages > 0 && !rc);
189 203
204 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
205 ret = (int)rc;
206 tce_freemulti_pSeriesLP(tbl, tcenum_start,
207 (npages_start - (npages + limit)));
208 return ret;
209 }
210
190 if (rc && printk_ratelimit()) { 211 if (rc && printk_ratelimit()) {
191 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 212 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
192 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 213 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
194 printk("\ttce[0] val = 0x%lx\n", tcep[0]); 215 printk("\ttce[0] val = 0x%lx\n", tcep[0]);
195 show_stack(current, (unsigned long *)__get_SP()); 216 show_stack(current, (unsigned long *)__get_SP());
196 } 217 }
218 return ret;
197} 219}
198 220
199static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) 221static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d8680b589dc9..a437267c6bf8 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
42 return vpa_call(0x3, cpu, vpa); 42 return vpa_call(0x3, cpu, vpa);
43} 43}
44 44
45static inline long plpar_page_set_loaned(unsigned long vpa)
46{
47 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
48}
49
50static inline long plpar_page_set_active(unsigned long vpa)
51{
52 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
53}
54
45extern void vpa_init(int cpu); 55extern void vpa_init(int cpu);
46 56
47static inline long plpar_pte_enter(unsigned long flags, 57static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 90beb444e1dd..063a0d2fba30 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr)
314 H_DABRX_KERNEL | H_DABRX_USER); 314 H_DABRX_KERNEL | H_DABRX_USER);
315} 315}
316 316
317#define CMO_CHARACTERISTICS_TOKEN 44
318#define CMO_MAXLENGTH 1026
319
320/**
321 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
322 * handle that here. (Stolen from parse_system_parameter_string)
323 */
324void pSeries_cmo_feature_init(void)
325{
326 char *ptr, *key, *value, *end;
327 int call_status;
328 int PrPSP = -1;
329 int SecPSP = -1;
330
331 pr_debug(" -> fw_cmo_feature_init()\n");
332 spin_lock(&rtas_data_buf_lock);
333 memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
334 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
335 NULL,
336 CMO_CHARACTERISTICS_TOKEN,
337 __pa(rtas_data_buf),
338 RTAS_DATA_BUF_SIZE);
339
340 if (call_status != 0) {
341 spin_unlock(&rtas_data_buf_lock);
342 pr_debug("CMO not available\n");
343 pr_debug(" <- fw_cmo_feature_init()\n");
344 return;
345 }
346
347 end = rtas_data_buf + CMO_MAXLENGTH - 2;
348 ptr = rtas_data_buf + 2; /* step over strlen value */
349 key = value = ptr;
350
351 while (*ptr && (ptr <= end)) {
352 /* Separate the key and value by replacing '=' with '\0' and
353 * point the value at the string after the '='
354 */
355 if (ptr[0] == '=') {
356 ptr[0] = '\0';
357 value = ptr + 1;
358 } else if (ptr[0] == '\0' || ptr[0] == ',') {
359 /* Terminate the string containing the key/value pair */
360 ptr[0] = '\0';
361
362 if (key == value) {
363 pr_debug("Malformed key/value pair\n");
364 /* Never found a '=', end processing */
365 break;
366 }
367
368 if (0 == strcmp(key, "PrPSP"))
369 PrPSP = simple_strtol(value, NULL, 10);
370 else if (0 == strcmp(key, "SecPSP"))
371 SecPSP = simple_strtol(value, NULL, 10);
372 value = key = ptr + 1;
373 }
374 ptr++;
375 }
376
377 if (PrPSP != -1 || SecPSP != -1) {
378 pr_info("CMO enabled\n");
379 pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
380 powerpc_firmware_features |= FW_FEATURE_CMO;
381 } else
382 pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
383 spin_unlock(&rtas_data_buf_lock);
384 pr_debug(" <- fw_cmo_feature_init()\n");
385}
386
317/* 387/*
318 * Early initialization. Relocation is on but do not reference unbolted pages 388 * Early initialization. Relocation is on but do not reference unbolted pages
319 */ 389 */
@@ -329,6 +399,7 @@ static void __init pSeries_init_early(void)
329 else if (firmware_has_feature(FW_FEATURE_XDABR)) 399 else if (firmware_has_feature(FW_FEATURE_XDABR))
330 ppc_md.set_dabr = pseries_set_xdabr; 400 ppc_md.set_dabr = pseries_set_xdabr;
331 401
402 pSeries_cmo_feature_init();
332 iommu_init_early_pSeries(); 403 iommu_init_early_pSeries();
333 404
334 pr_debug(" <- pSeries_init_early()\n"); 405 pr_debug(" <- pSeries_init_early()\n");
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index de8c8b542cfa..89639ecbf381 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl)
147 } 147 }
148} 148}
149 149
150static void dart_build(struct iommu_table *tbl, long index, 150static int dart_build(struct iommu_table *tbl, long index,
151 long npages, unsigned long uaddr, 151 long npages, unsigned long uaddr,
152 enum dma_data_direction direction, 152 enum dma_data_direction direction,
153 struct dma_attrs *attrs) 153 struct dma_attrs *attrs)
@@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index,
184 } else { 184 } else {
185 dart_dirty = 1; 185 dart_dirty = 1;
186 } 186 }
187 return 0;
187} 188}
188 189
189 190
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
index 4bb18f57901e..1ce546462be5 100644
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -29,7 +29,7 @@ config QE_GPIO
29 bool "QE GPIO support" 29 bool "QE GPIO support"
30 depends on QUICC_ENGINE 30 depends on QUICC_ENGINE
31 select GENERIC_GPIO 31 select GENERIC_GPIO
32 select HAVE_GPIO_LIB 32 select ARCH_REQUIRE_GPIOLIB
33 help 33 help
34 Say Y here if you're going to use hardware that connects to the 34 Say Y here if you're going to use hardware that connects to the
35 QE GPIOs. 35 QE GPIOs.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index eb530b4128ba..2ed88122be93 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)"
565 depends on 64BIT && EXPERIMENTAL 565 depends on 64BIT && EXPERIMENTAL
566 select VIRTIO 566 select VIRTIO
567 select VIRTIO_RING 567 select VIRTIO_RING
568 select VIRTIO_CONSOLE
568 help 569 help
569 Select this option if you want to run the kernel under s390 linux 570 Select this option if you want to run the kernel under s390 linux
570endmenu 571endmenu
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 288ad490a6dd..4f82e5b5f879 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
270 __ctl_store(kcb->kprobe_saved_ctl, 9, 11); 270 __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
271} 271}
272 272
273/* Called with kretprobe_lock held */
274void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 273void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
275 struct pt_regs *regs) 274 struct pt_regs *regs)
276{ 275{
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
377 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 376 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
378 377
379 INIT_HLIST_HEAD(&empty_rp); 378 INIT_HLIST_HEAD(&empty_rp);
380 spin_lock_irqsave(&kretprobe_lock, flags); 379 kretprobe_hash_lock(current, &head, &flags);
381 head = kretprobe_inst_table_head(current);
382 380
383 /* 381 /*
384 * It is possible to have multiple instances associated with a given 382 * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
417 regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; 415 regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
418 416
419 reset_current_kprobe(); 417 reset_current_kprobe();
420 spin_unlock_irqrestore(&kretprobe_lock, flags); 418 kretprobe_hash_unlock(current, &flags);
421 preempt_enable_no_resched(); 419 preempt_enable_no_resched();
422 420
423 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 421 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b358e18273b0..62122bad1e33 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -54,6 +54,7 @@
54#include <asm/sections.h> 54#include <asm/sections.h>
55#include <asm/ebcdic.h> 55#include <asm/ebcdic.h>
56#include <asm/compat.h> 56#include <asm/compat.h>
57#include <asm/kvm_virtio.h>
57 58
58long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | 59long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
59 PSW_MASK_MCHECK | PSW_DEFAULT_KEY); 60 PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
@@ -766,7 +767,8 @@ setup_arch(char **cmdline_p)
766 printk("We are running under VM (64 bit mode)\n"); 767 printk("We are running under VM (64 bit mode)\n");
767 else if (MACHINE_IS_KVM) { 768 else if (MACHINE_IS_KVM) {
768 printk("We are running under KVM (64 bit mode)\n"); 769 printk("We are running under KVM (64 bit mode)\n");
769 add_preferred_console("ttyS", 1, NULL); 770 add_preferred_console("hvc", 0, NULL);
771 s390_virtio_console_init();
770 } else 772 } else
771 printk("We are running native (64 bit mode)\n"); 773 printk("We are running native (64 bit mode)\n");
772#endif /* CONFIG_64BIT */ 774#endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 212d618b0095..632b13e10053 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -9,7 +9,6 @@
9#include <linux/device.h> 9#include <linux/device.h>
10#include <linux/bootmem.h> 10#include <linux/bootmem.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/kthread.h>
13#include <linux/workqueue.h> 12#include <linux/workqueue.h>
14#include <linux/cpu.h> 13#include <linux/cpu.h>
15#include <linux/smp.h> 14#include <linux/smp.h>
@@ -230,20 +229,9 @@ void arch_update_cpu_topology(void)
230 } 229 }
231} 230}
232 231
233static int topology_kthread(void *data)
234{
235 arch_reinit_sched_domains();
236 return 0;
237}
238
239static void topology_work_fn(struct work_struct *work) 232static void topology_work_fn(struct work_struct *work)
240{ 233{
241 /* We can't call arch_reinit_sched_domains() from a multi-threaded 234 arch_reinit_sched_domains();
242 * workqueue context since it may deadlock in case of cpu hotplug.
243 * So we have to create a kernel thread in order to call
244 * arch_reinit_sched_domains().
245 */
246 kthread_run(topology_kthread, NULL, "topology_update");
247} 235}
248 236
249void topology_schedule_update(void) 237void topology_schedule_update(void)
diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c
index adcea31e663e..f386997e4d9c 100644
--- a/arch/sh/boot/compressed/misc_32.c
+++ b/arch/sh/boot/compressed/misc_32.c
@@ -74,8 +74,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
74static int fill_inbuf(void); 74static int fill_inbuf(void);
75static void flush_window(void); 75static void flush_window(void);
76static void error(char *m); 76static void error(char *m);
77static void gzip_mark(void **);
78static void gzip_release(void **);
79 77
80extern char input_data[]; 78extern char input_data[];
81extern int input_len; 79extern int input_len;
@@ -84,11 +82,7 @@ static long bytes_out = 0;
84static uch *output_data; 82static uch *output_data;
85static unsigned long output_ptr = 0; 83static unsigned long output_ptr = 0;
86 84
87static void *malloc(int size);
88static void free(void *where);
89static void error(char *m); 85static void error(char *m);
90static void gzip_mark(void **);
91static void gzip_release(void **);
92 86
93int puts(const char *); 87int puts(const char *);
94 88
@@ -101,38 +95,6 @@ static unsigned long free_mem_end_ptr;
101 95
102#include "../../../../lib/inflate.c" 96#include "../../../../lib/inflate.c"
103 97
104static void *malloc(int size)
105{
106 void *p;
107
108 if (size <0) error("Malloc error");
109 if (free_mem_ptr == 0) error("Memory error");
110
111 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
112
113 p = (void *)free_mem_ptr;
114 free_mem_ptr += size;
115
116 if (free_mem_ptr >= free_mem_end_ptr)
117 error("Out of memory");
118
119 return p;
120}
121
122static void free(void *where)
123{ /* Don't care */
124}
125
126static void gzip_mark(void **ptr)
127{
128 *ptr = (void *) free_mem_ptr;
129}
130
131static void gzip_release(void **ptr)
132{
133 free_mem_ptr = (long) *ptr;
134}
135
136#ifdef CONFIG_SH_STANDARD_BIOS 98#ifdef CONFIG_SH_STANDARD_BIOS
137size_t strlen(const char *s) 99size_t strlen(const char *s)
138{ 100{
diff --git a/arch/sh/boot/compressed/misc_64.c b/arch/sh/boot/compressed/misc_64.c
index a006ef89b9dd..2941657e18aa 100644
--- a/arch/sh/boot/compressed/misc_64.c
+++ b/arch/sh/boot/compressed/misc_64.c
@@ -72,8 +72,6 @@ static unsigned outcnt = 0; /* bytes in output buffer */
72static int fill_inbuf(void); 72static int fill_inbuf(void);
73static void flush_window(void); 73static void flush_window(void);
74static void error(char *m); 74static void error(char *m);
75static void gzip_mark(void **);
76static void gzip_release(void **);
77 75
78extern char input_data[]; 76extern char input_data[];
79extern int input_len; 77extern int input_len;
@@ -82,11 +80,7 @@ static long bytes_out = 0;
82static uch *output_data; 80static uch *output_data;
83static unsigned long output_ptr = 0; 81static unsigned long output_ptr = 0;
84 82
85static void *malloc(int size);
86static void free(void *where);
87static void error(char *m); 83static void error(char *m);
88static void gzip_mark(void **);
89static void gzip_release(void **);
90 84
91static void puts(const char *); 85static void puts(const char *);
92 86
@@ -99,40 +93,6 @@ static unsigned long free_mem_end_ptr;
99 93
100#include "../../../../lib/inflate.c" 94#include "../../../../lib/inflate.c"
101 95
102static void *malloc(int size)
103{
104 void *p;
105
106 if (size < 0)
107 error("Malloc error\n");
108 if (free_mem_ptr == 0)
109 error("Memory error\n");
110
111 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
112
113 p = (void *) free_mem_ptr;
114 free_mem_ptr += size;
115
116 if (free_mem_ptr >= free_mem_end_ptr)
117 error("\nOut of memory\n");
118
119 return p;
120}
121
122static void free(void *where)
123{ /* Don't care */
124}
125
126static void gzip_mark(void **ptr)
127{
128 *ptr = (void *) free_mem_ptr;
129}
130
131static void gzip_release(void **ptr)
132{
133 free_mem_ptr = (long) *ptr;
134}
135
136void puts(const char *s) 96void puts(const char *s)
137{ 97{
138} 98}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 789724e61e83..375de7c6d082 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -298,20 +298,6 @@ config UNIX98_PTYS
298 Read the instructions in <file:Documentation/Changes> pertaining to 298 Read the instructions in <file:Documentation/Changes> pertaining to
299 pseudo terminals. It's safe to say N. 299 pseudo terminals. It's safe to say N.
300 300
301config UNIX98_PTY_COUNT
302 int "Maximum number of Unix98 PTYs in use (0-2048)"
303 depends on UNIX98_PTYS
304 default "256"
305 help
306 The maximum number of Unix98 PTYs that can be used at any one time.
307 The default is 256, and should be enough for desktop systems. Server
308 machines which support incoming telnet/rlogin/ssh connections and/or
309 serve several X terminals may want to increase this: every incoming
310 connection and every xterm uses up one PTY.
311
312 When not in use, each additional set of 256 PTYs occupy
313 approximately 8 KB of kernel memory on 32-bit architectures.
314
315endmenu 301endmenu
316 302
317source "fs/Kconfig" 303source "fs/Kconfig"
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index f43b5d755354..201a6e547e4a 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
478 return 0; 478 return 0;
479} 479}
480 480
481/* Called with kretprobe_lock held. The value stored in the return 481/* The value stored in the return address register is actually 2
482 * address register is actually 2 instructions before where the 482 * instructions before where the callee will return to.
483 * callee will return to. Sequences usually look something like this 483 * Sequences usually look something like this
484 * 484 *
485 * call some_function <--- return register points here 485 * call some_function <--- return register points here
486 * nop <--- call delay slot 486 * nop <--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
512 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 512 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
513 513
514 INIT_HLIST_HEAD(&empty_rp); 514 INIT_HLIST_HEAD(&empty_rp);
515 spin_lock_irqsave(&kretprobe_lock, flags); 515 kretprobe_hash_lock(current, &head, &flags);
516 head = kretprobe_inst_table_head(current);
517 516
518 /* 517 /*
519 * It is possible to have multiple instances associated with a given 518 * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
553 regs->tnpc = orig_ret_address + 4; 552 regs->tnpc = orig_ret_address + 4;
554 553
555 reset_current_kprobe(); 554 reset_current_kprobe();
556 spin_unlock_irqrestore(&kretprobe_lock, flags); 555 kretprobe_hash_unlock(current, &flags);
557 preempt_enable_no_resched(); 556 preempt_enable_no_resched();
558 557
559 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 558 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b2ddfcf01728..e3cba0b45600 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,11 +23,13 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_IOREMAP_PROT 24 select HAVE_IOREMAP_PROT
25 select HAVE_KPROBES 25 select HAVE_KPROBES
26 select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
26 select HAVE_KRETPROBES 27 select HAVE_KRETPROBES
27 select HAVE_DYNAMIC_FTRACE 28 select HAVE_DYNAMIC_FTRACE
28 select HAVE_FTRACE 29 select HAVE_FTRACE
29 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 30 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
30 select HAVE_ARCH_KGDB if !X86_VOYAGER 31 select HAVE_ARCH_KGDB if !X86_VOYAGER
32 select HAVE_EFFICIENT_UNALIGNED_ACCESS
31 33
32config ARCH_DEFCONFIG 34config ARCH_DEFCONFIG
33 string 35 string
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index bc5553b496f7..9fea73706479 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -182,8 +182,6 @@ static unsigned outcnt;
182static int fill_inbuf(void); 182static int fill_inbuf(void);
183static void flush_window(void); 183static void flush_window(void);
184static void error(char *m); 184static void error(char *m);
185static void gzip_mark(void **);
186static void gzip_release(void **);
187 185
188/* 186/*
189 * This is set up by the setup-routine at boot-time 187 * This is set up by the setup-routine at boot-time
@@ -196,9 +194,6 @@ extern int input_len;
196 194
197static long bytes_out; 195static long bytes_out;
198 196
199static void *malloc(int size);
200static void free(void *where);
201
202static void *memset(void *s, int c, unsigned n); 197static void *memset(void *s, int c, unsigned n);
203static void *memcpy(void *dest, const void *src, unsigned n); 198static void *memcpy(void *dest, const void *src, unsigned n);
204 199
@@ -220,40 +215,6 @@ static int lines, cols;
220 215
221#include "../../../../lib/inflate.c" 216#include "../../../../lib/inflate.c"
222 217
223static void *malloc(int size)
224{
225 void *p;
226
227 if (size < 0)
228 error("Malloc error");
229 if (free_mem_ptr <= 0)
230 error("Memory error");
231
232 free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
233
234 p = (void *)free_mem_ptr;
235 free_mem_ptr += size;
236
237 if (free_mem_ptr >= free_mem_end_ptr)
238 error("Out of memory");
239
240 return p;
241}
242
243static void free(void *where)
244{ /* Don't care */
245}
246
247static void gzip_mark(void **ptr)
248{
249 *ptr = (void *) free_mem_ptr;
250}
251
252static void gzip_release(void **ptr)
253{
254 free_mem_ptr = (memptr) *ptr;
255}
256
257static void scroll(void) 218static void scroll(void)
258{ 219{
259 int i; 220 int i;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0d..6c27679ec6aa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
431 regs->ip = (unsigned long)p->ainsn.insn; 431 regs->ip = (unsigned long)p->ainsn.insn;
432} 432}
433 433
434/* Called with kretprobe_lock held */
435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 434void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
436 struct pt_regs *regs) 435 struct pt_regs *regs)
437{ 436{
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
682 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 681 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
683 682
684 INIT_HLIST_HEAD(&empty_rp); 683 INIT_HLIST_HEAD(&empty_rp);
685 spin_lock_irqsave(&kretprobe_lock, flags); 684 kretprobe_hash_lock(current, &head, &flags);
686 head = kretprobe_inst_table_head(current);
687 /* fixup registers */ 685 /* fixup registers */
688#ifdef CONFIG_X86_64 686#ifdef CONFIG_X86_64
689 regs->cs = __KERNEL_CS; 687 regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
732 730
733 kretprobe_assert(ri, orig_ret_address, trampoline_address); 731 kretprobe_assert(ri, orig_ret_address, trampoline_address);
734 732
735 spin_unlock_irqrestore(&kretprobe_lock, flags); 733 kretprobe_hash_unlock(current, &flags);
736 734
737 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 735 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
738 hlist_del(&ri->hlist); 736 hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7c..19e7fc7c2c4f 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/spinlock.h> 30#include <linux/spinlock.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/crash_dump.h>
32#include <linux/dma-mapping.h> 33#include <linux/dma-mapping.h>
33#include <linux/bitops.h> 34#include <linux/bitops.h>
34#include <linux/pci_ids.h> 35#include <linux/pci_ids.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
167static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); 168static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
168static void calioc2_tce_cache_blast(struct iommu_table *tbl); 169static void calioc2_tce_cache_blast(struct iommu_table *tbl);
169static void calioc2_dump_error_regs(struct iommu_table *tbl); 170static void calioc2_dump_error_regs(struct iommu_table *tbl);
171static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
172static void get_tce_space_from_tar(void);
170 173
171static struct cal_chipset_ops calgary_chip_ops = { 174static struct cal_chipset_ops calgary_chip_ops = {
172 .handle_quirks = calgary_handle_quirks, 175 .handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
830 833
831 tbl = pci_iommu(dev->bus); 834 tbl = pci_iommu(dev->bus);
832 tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; 835 tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
833 tce_free(tbl, 0, tbl->it_size); 836
837 if (is_kdump_kernel())
838 calgary_init_bitmap_from_tce_table(tbl);
839 else
840 tce_free(tbl, 0, tbl->it_size);
834 841
835 if (is_calgary(dev->device)) 842 if (is_calgary(dev->device))
836 tbl->chip_ops = &calgary_chip_ops; 843 tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@ static int __init calgary_init(void)
1209 if (ret) 1216 if (ret)
1210 return ret; 1217 return ret;
1211 1218
1219 /* Purely for kdump kernel case */
1220 if (is_kdump_kernel())
1221 get_tce_space_from_tar();
1222
1212 do { 1223 do {
1213 dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); 1224 dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
1214 if (!dev) 1225 if (!dev)
@@ -1339,6 +1350,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
1339 return (val != 0xffffffff); 1350 return (val != 0xffffffff);
1340} 1351}
1341 1352
1353/*
1354 * calgary_init_bitmap_from_tce_table():
1355 * Funtion for kdump case. In the second/kdump kernel initialize
1356 * the bitmap based on the tce table entries obtained from first kernel
1357 */
1358static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
1359{
1360 u64 *tp;
1361 unsigned int index;
1362 tp = ((u64 *)tbl->it_base);
1363 for (index = 0 ; index < tbl->it_size; index++) {
1364 if (*tp != 0x0)
1365 set_bit(index, tbl->it_map);
1366 tp++;
1367 }
1368}
1369
1370/*
1371 * get_tce_space_from_tar():
1372 * Function for kdump case. Get the tce tables from first kernel
1373 * by reading the contents of the base adress register of calgary iommu
1374 */
1375static void get_tce_space_from_tar()
1376{
1377 int bus;
1378 void __iomem *target;
1379 unsigned long tce_space;
1380
1381 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
1382 struct calgary_bus_info *info = &bus_info[bus];
1383 unsigned short pci_device;
1384 u32 val;
1385
1386 val = read_pci_config(bus, 0, 0, 0);
1387 pci_device = (val & 0xFFFF0000) >> 16;
1388
1389 if (!is_cal_pci_dev(pci_device))
1390 continue;
1391 if (info->translation_disabled)
1392 continue;
1393
1394 if (calgary_bus_has_devices(bus, pci_device) ||
1395 translate_empty_slots) {
1396 target = calgary_reg(bus_info[bus].bbar,
1397 tar_offset(bus));
1398 tce_space = be64_to_cpu(readq(target));
1399 tce_space = tce_space & TAR_SW_BITS;
1400
1401 tce_space = tce_space & (~specified_table_size);
1402 info->tce_space = (u64 *)__va(tce_space);
1403 }
1404 }
1405 return;
1406}
1407
1342void __init detect_calgary(void) 1408void __init detect_calgary(void)
1343{ 1409{
1344 int bus; 1410 int bus;
@@ -1394,7 +1460,8 @@ void __init detect_calgary(void)
1394 return; 1460 return;
1395 } 1461 }
1396 1462
1397 specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); 1463 specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
1464 saved_max_pfn : max_pfn) * PAGE_SIZE);
1398 1465
1399 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 1466 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
1400 struct calgary_bus_info *info = &bus_info[bus]; 1467 struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@ void __init detect_calgary(void)
1412 1479
1413 if (calgary_bus_has_devices(bus, pci_device) || 1480 if (calgary_bus_has_devices(bus, pci_device) ||
1414 translate_empty_slots) { 1481 translate_empty_slots) {
1415 tbl = alloc_tce_table(); 1482 /*
1416 if (!tbl) 1483 * If it is kdump kernel, find and use tce tables
1417 goto cleanup; 1484 * from first kernel, else allocate tce tables here
1418 info->tce_space = tbl; 1485 */
1486 if (!is_kdump_kernel()) {
1487 tbl = alloc_tce_table();
1488 if (!tbl)
1489 goto cleanup;
1490 info->tce_space = tbl;
1491 }
1419 calgary_found = 1; 1492 calgary_found = 1;
1420 } 1493 }
1421 } 1494 }