aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 14:08:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 14:08:17 -0400
commit5047887caf1806f31652210df27fb62a7c43f27d (patch)
tree4098ead40c1aa7b904167f67cff87a247cfa0b6c
parent996abf053eec4d67136be8b911bbaaf989cfb99c (diff)
parent973b7d83ebeb1e34b8bee69208916e5f0e2353c3 (diff)
Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (34 commits) powerpc: Wireup new syscalls Move update_mmu_cache() declaration from tlbflush.h to pgtable.h powerpc/pseries: Remove kmalloc call in handling writes to lparcfg powerpc/pseries: Update arch vector to indicate support for CMO ibmvfc: Add support for collaborative memory overcommit ibmvscsi: driver enablement for CMO ibmveth: enable driver for CMO ibmveth: Automatically enable larger rx buffer pools for larger mtu powerpc/pseries: Verify CMO memory entitlement updates with virtual I/O powerpc/pseries: vio bus support for CMO powerpc/pseries: iommu enablement for CMO powerpc/pseries: Add CMO paging statistics powerpc/pseries: Add collaborative memory manager powerpc/pseries: Utilities to set firmware page state powerpc/pseries: Enable CMO feature during platform setup powerpc/pseries: Split retrieval of processor entitlement data into a helper routine powerpc/pseries: Add memory entitlement capabilities to /proc/ppc64/lparcfg powerpc/pseries: Split processor entitlement retrieval and gathering to helper routines powerpc/pseries: Remove extraneous error reporting for hcall failures in lparcfg powerpc: Fix compile error with binutils 2.15 ... Fixed up conflict in arch/powerpc/platforms/52xx/Kconfig manually.
-rw-r--r--Documentation/powerpc/booting-without-of.txt57
-rw-r--r--arch/powerpc/kernel/cputable.c11
-rw-r--r--arch/powerpc/kernel/entry_32.S6
-rw-r--r--arch/powerpc/kernel/iommu.c28
-rw-r--r--arch/powerpc/kernel/lparcfg.c386
-rw-r--r--arch/powerpc/kernel/process.c46
-rw-r--r--arch/powerpc/kernel/prom_init.c9
-rw-r--r--arch/powerpc/kernel/ptrace.c72
-rw-r--r--arch/powerpc/kernel/signal.c6
-rw-r--r--arch/powerpc/kernel/sysfs.c3
-rw-r--r--arch/powerpc/kernel/traps.c16
-rw-r--r--arch/powerpc/kernel/vio.c1033
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/mm/fault.c25
-rw-r--r--arch/powerpc/platforms/52xx/Kconfig2
-rw-r--r--arch/powerpc/platforms/cell/iommu.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c35
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.c3
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c3
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c3
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig23
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c468
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c42
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c71
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c3
-rw-r--r--drivers/net/ibmveth.c189
-rw-r--r--drivers/net/ibmveth.h5
-rw-r--r--drivers/of/of_i2c.c2
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c15
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c45
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.h2
-rw-r--r--fs/binfmt_elf.c28
-rw-r--r--include/asm-powerpc/cputable.h2
-rw-r--r--include/asm-powerpc/elf.h8
-rw-r--r--include/asm-powerpc/firmware.h3
-rw-r--r--include/asm-powerpc/hvcall.h23
-rw-r--r--include/asm-powerpc/lppaca.h5
-rw-r--r--include/asm-powerpc/machdep.h2
-rw-r--r--include/asm-powerpc/mpc52xx_psc.h40
-rw-r--r--include/asm-powerpc/pgtable.h13
-rw-r--r--include/asm-powerpc/syscalls.h1
-rw-r--r--include/asm-powerpc/systbl.h6
-rw-r--r--include/asm-powerpc/system.h2
-rw-r--r--include/asm-powerpc/tlbflush.h11
-rw-r--r--include/asm-powerpc/unistd.h8
-rw-r--r--include/asm-powerpc/vio.h27
-rw-r--r--include/linux/auxvec.h6
49 files changed, 2546 insertions, 277 deletions
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index ea1b70b35793..99514ced82c5 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -59,6 +59,7 @@ Table of Contents
59 p) Freescale Synchronous Serial Interface 59 p) Freescale Synchronous Serial Interface
60 q) USB EHCI controllers 60 q) USB EHCI controllers
61 r) MDIO on GPIOs 61 r) MDIO on GPIOs
62 s) SPI busses
62 63
63 VII - Marvell Discovery mv64[345]6x System Controller chips 64 VII - Marvell Discovery mv64[345]6x System Controller chips
64 1) The /system-controller node 65 1) The /system-controller node
@@ -1883,6 +1884,62 @@ platforms are moved over to use the flattened-device-tree model.
1883 &qe_pio_c 6>; 1884 &qe_pio_c 6>;
1884 }; 1885 };
1885 1886
1887 s) SPI (Serial Peripheral Interface) busses
1888
1889 SPI busses can be described with a node for the SPI master device
1890 and a set of child nodes for each SPI slave on the bus. For this
1891 discussion, it is assumed that the system's SPI controller is in
1892 SPI master mode. This binding does not describe SPI controllers
1893 in slave mode.
1894
1895 The SPI master node requires the following properties:
1896 - #address-cells - number of cells required to define a chip select
1897 address on the SPI bus.
1898 - #size-cells - should be zero.
1899 - compatible - name of SPI bus controller following generic names
1900 recommended practice.
1901 No other properties are required in the SPI bus node. It is assumed
1902 that a driver for an SPI bus device will understand that it is an SPI bus.
1903 However, the binding does not attempt to define the specific method for
1904 assigning chip select numbers. Since SPI chip select configuration is
1905 flexible and non-standardized, it is left out of this binding with the
1906 assumption that board specific platform code will be used to manage
1907 chip selects. Individual drivers can define additional properties to
1908 support describing the chip select layout.
1909
1910 SPI slave nodes must be children of the SPI master node and can
1911 contain the following properties.
1912 - reg - (required) chip select address of device.
1913 - compatible - (required) name of SPI device following generic names
1914 recommended practice
1915 - spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz
1916 - spi-cpol - (optional) Empty property indicating device requires
1917 inverse clock polarity (CPOL) mode
1918 - spi-cpha - (optional) Empty property indicating device requires
1919 shifted clock phase (CPHA) mode
1920
1921 SPI example for an MPC5200 SPI bus:
1922 spi@f00 {
1923 #address-cells = <1>;
1924 #size-cells = <0>;
1925 compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
1926 reg = <0xf00 0x20>;
1927 interrupts = <2 13 0 2 14 0>;
1928 interrupt-parent = <&mpc5200_pic>;
1929
1930 ethernet-switch@0 {
1931 compatible = "micrel,ks8995m";
1932 spi-max-frequency = <1000000>;
1933 reg = <0>;
1934 };
1935
1936 codec@1 {
1937 compatible = "ti,tlv320aic26";
1938 spi-max-frequency = <100000>;
1939 reg = <1>;
1940 };
1941 };
1942
1886VII - Marvell Discovery mv64[345]6x System Controller chips 1943VII - Marvell Discovery mv64[345]6x System Controller chips
1887=========================================================== 1944===========================================================
1888 1945
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b936a1dd0a50..25a052c16754 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,6 +23,9 @@
23struct cpu_spec* cur_cpu_spec = NULL; 23struct cpu_spec* cur_cpu_spec = NULL;
24EXPORT_SYMBOL(cur_cpu_spec); 24EXPORT_SYMBOL(cur_cpu_spec);
25 25
26/* The platform string corresponding to the real PVR */
27const char *powerpc_base_platform;
28
26/* NOTE: 29/* NOTE:
27 * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's 30 * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
28 * the responsibility of the appropriate CPU save/restore functions to 31 * the responsibility of the appropriate CPU save/restore functions to
@@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
1652 } else 1655 } else
1653 *t = *s; 1656 *t = *s;
1654 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; 1657 *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
1658
1659 /*
1660 * Set the base platform string once; assumes
1661 * we're called with real pvr first.
1662 */
1663 if (powerpc_base_platform == NULL)
1664 powerpc_base_platform = t->platform;
1665
1655#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE) 1666#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
1656 /* ppc64 and booke expect identify_cpu to also call 1667 /* ppc64 and booke expect identify_cpu to also call
1657 * setup_cpu for that processor. I will consolidate 1668 * setup_cpu for that processor. I will consolidate
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index da52269aec1e..81c8324a4a3c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -148,7 +148,7 @@ transfer_to_handler:
148 /* Check to see if the dbcr0 register is set up to debug. Use the 148 /* Check to see if the dbcr0 register is set up to debug. Use the
149 internal debug mode bit to do this. */ 149 internal debug mode bit to do this. */
150 lwz r12,THREAD_DBCR0(r12) 150 lwz r12,THREAD_DBCR0(r12)
151 andis. r12,r12,DBCR0_IDM@h 151 andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
152 beq+ 3f 152 beq+ 3f
153 /* From user and task is ptraced - load up global dbcr0 */ 153 /* From user and task is ptraced - load up global dbcr0 */
154 li r12,-1 /* clear all pending debug events */ 154 li r12,-1 /* clear all pending debug events */
@@ -292,7 +292,7 @@ syscall_exit_cont:
292 /* If the process has its own DBCR0 value, load it up. The internal 292 /* If the process has its own DBCR0 value, load it up. The internal
293 debug mode bit tells us that dbcr0 should be loaded. */ 293 debug mode bit tells us that dbcr0 should be loaded. */
294 lwz r0,THREAD+THREAD_DBCR0(r2) 294 lwz r0,THREAD+THREAD_DBCR0(r2)
295 andis. r10,r0,DBCR0_IDM@h 295 andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
296 bnel- load_dbcr0 296 bnel- load_dbcr0
297#endif 297#endif
298#ifdef CONFIG_44x 298#ifdef CONFIG_44x
@@ -720,7 +720,7 @@ restore_user:
720 /* Check whether this process has its own DBCR0 value. The internal 720 /* Check whether this process has its own DBCR0 value. The internal
721 debug mode bit tells us that dbcr0 should be loaded. */ 721 debug mode bit tells us that dbcr0 should be loaded. */
722 lwz r0,THREAD+THREAD_DBCR0(r2) 722 lwz r0,THREAD+THREAD_DBCR0(r2)
723 andis. r10,r0,DBCR0_IDM@h 723 andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
724 bnel- load_dbcr0 724 bnel- load_dbcr0
725#endif 725#endif
726 726
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2385f68c1751..550a19399bfa 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -49,6 +49,8 @@ static int novmerge = 1;
49 49
50static int protect4gb = 1; 50static int protect4gb = 1;
51 51
52static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
53
52static inline unsigned long iommu_num_pages(unsigned long vaddr, 54static inline unsigned long iommu_num_pages(unsigned long vaddr,
53 unsigned long slen) 55 unsigned long slen)
54{ 56{
@@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
191{ 193{
192 unsigned long entry, flags; 194 unsigned long entry, flags;
193 dma_addr_t ret = DMA_ERROR_CODE; 195 dma_addr_t ret = DMA_ERROR_CODE;
196 int build_fail;
194 197
195 spin_lock_irqsave(&(tbl->it_lock), flags); 198 spin_lock_irqsave(&(tbl->it_lock), flags);
196 199
@@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
205 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ 208 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
206 209
207 /* Put the TCEs in the HW table */ 210 /* Put the TCEs in the HW table */
208 ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK, 211 build_fail = ppc_md.tce_build(tbl, entry, npages,
209 direction, attrs); 212 (unsigned long)page & IOMMU_PAGE_MASK,
213 direction, attrs);
214
215 /* ppc_md.tce_build() only returns non-zero for transient errors.
216 * Clean up the table bitmap in this case and return
217 * DMA_ERROR_CODE. For all other errors the functionality is
218 * not altered.
219 */
220 if (unlikely(build_fail)) {
221 __iommu_free(tbl, ret, npages);
210 222
223 spin_unlock_irqrestore(&(tbl->it_lock), flags);
224 return DMA_ERROR_CODE;
225 }
211 226
212 /* Flush/invalidate TLB caches if necessary */ 227 /* Flush/invalidate TLB caches if necessary */
213 if (ppc_md.tce_flush) 228 if (ppc_md.tce_flush)
@@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
276 dma_addr_t dma_next = 0, dma_addr; 291 dma_addr_t dma_next = 0, dma_addr;
277 unsigned long flags; 292 unsigned long flags;
278 struct scatterlist *s, *outs, *segstart; 293 struct scatterlist *s, *outs, *segstart;
279 int outcount, incount, i; 294 int outcount, incount, i, build_fail = 0;
280 unsigned int align; 295 unsigned int align;
281 unsigned long handle; 296 unsigned long handle;
282 unsigned int max_seg_size; 297 unsigned int max_seg_size;
@@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
337 npages, entry, dma_addr); 352 npages, entry, dma_addr);
338 353
339 /* Insert into HW table */ 354 /* Insert into HW table */
340 ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, 355 build_fail = ppc_md.tce_build(tbl, entry, npages,
341 direction, attrs); 356 vaddr & IOMMU_PAGE_MASK,
357 direction, attrs);
358 if(unlikely(build_fail))
359 goto failure;
342 360
343 /* If we are in an open segment, try merging */ 361 /* If we are in an open segment, try merging */
344 if (segstart != s) { 362 if (segstart != s) {
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 827a5726a035..9f856a0c3e38 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -34,8 +34,9 @@
34#include <asm/time.h> 34#include <asm/time.h>
35#include <asm/prom.h> 35#include <asm/prom.h>
36#include <asm/vdso_datapage.h> 36#include <asm/vdso_datapage.h>
37#include <asm/vio.h>
37 38
38#define MODULE_VERS "1.7" 39#define MODULE_VERS "1.8"
39#define MODULE_NAME "lparcfg" 40#define MODULE_NAME "lparcfg"
40 41
41/* #define LPARCFG_DEBUG */ 42/* #define LPARCFG_DEBUG */
@@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
129/* 130/*
130 * Methods used to fetch LPAR data when running on a pSeries platform. 131 * Methods used to fetch LPAR data when running on a pSeries platform.
131 */ 132 */
132static void log_plpar_hcall_return(unsigned long rc, char *tag) 133/**
134 * h_get_mpp
135 * H_GET_MPP hcall returns info in 7 parms
136 */
137int h_get_mpp(struct hvcall_mpp_data *mpp_data)
133{ 138{
134 switch(rc) { 139 int rc;
135 case 0: 140 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
136 return; 141
137 case H_HARDWARE: 142 rc = plpar_hcall9(H_GET_MPP, retbuf);
138 printk(KERN_INFO "plpar-hcall (%s) " 143
139 "Hardware fault\n", tag); 144 mpp_data->entitled_mem = retbuf[0];
140 return; 145 mpp_data->mapped_mem = retbuf[1];
141 case H_FUNCTION: 146
142 printk(KERN_INFO "plpar-hcall (%s) " 147 mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
143 "Function not allowed\n", tag); 148 mpp_data->pool_num = retbuf[2] & 0xffff;
144 return; 149
145 case H_AUTHORITY: 150 mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
146 printk(KERN_INFO "plpar-hcall (%s) " 151 mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
147 "Not authorized to this function\n", tag); 152 mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
148 return; 153
149 case H_PARAMETER: 154 mpp_data->pool_size = retbuf[4];
150 printk(KERN_INFO "plpar-hcall (%s) " 155 mpp_data->loan_request = retbuf[5];
151 "Bad parameter(s)\n",tag); 156 mpp_data->backing_mem = retbuf[6];
152 return; 157
153 default: 158 return rc;
154 printk(KERN_INFO "plpar-hcall (%s) "
155 "Unexpected rc(0x%lx)\n", tag, rc);
156 }
157} 159}
160EXPORT_SYMBOL(h_get_mpp);
161
162struct hvcall_ppp_data {
163 u64 entitlement;
164 u64 unallocated_entitlement;
165 u16 group_num;
166 u16 pool_num;
167 u8 capped;
168 u8 weight;
169 u8 unallocated_weight;
170 u16 active_procs_in_pool;
171 u16 active_system_procs;
172};
158 173
159/* 174/*
160 * H_GET_PPP hcall returns info in 4 parms. 175 * H_GET_PPP hcall returns info in 4 parms.
@@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag)
176 * XXXX - Active processors in Physical Processor Pool. 191 * XXXX - Active processors in Physical Processor Pool.
177 * XXXX - Processors active on platform. 192 * XXXX - Processors active on platform.
178 */ 193 */
179static unsigned int h_get_ppp(unsigned long *entitled, 194static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
180 unsigned long *unallocated,
181 unsigned long *aggregation,
182 unsigned long *resource)
183{ 195{
184 unsigned long rc; 196 unsigned long rc;
185 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 197 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
186 198
187 rc = plpar_hcall(H_GET_PPP, retbuf); 199 rc = plpar_hcall(H_GET_PPP, retbuf);
188 200
189 *entitled = retbuf[0]; 201 ppp_data->entitlement = retbuf[0];
190 *unallocated = retbuf[1]; 202 ppp_data->unallocated_entitlement = retbuf[1];
191 *aggregation = retbuf[2]; 203
192 *resource = retbuf[3]; 204 ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
205 ppp_data->pool_num = retbuf[2] & 0xffff;
193 206
194 log_plpar_hcall_return(rc, "H_GET_PPP"); 207 ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
208 ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
209 ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
210 ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
211 ppp_data->active_system_procs = retbuf[3] & 0xffff;
195 212
196 return rc; 213 return rc;
197} 214}
198 215
199static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) 216static unsigned h_pic(unsigned long *pool_idle_time,
217 unsigned long *num_procs)
200{ 218{
201 unsigned long rc; 219 unsigned long rc;
202 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 220 unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
206 *pool_idle_time = retbuf[0]; 224 *pool_idle_time = retbuf[0];
207 *num_procs = retbuf[1]; 225 *num_procs = retbuf[1];
208 226
209 if (rc != H_AUTHORITY) 227 return rc;
210 log_plpar_hcall_return(rc, "H_PIC"); 228}
229
230/*
231 * parse_ppp_data
232 * Parse out the data returned from h_get_ppp and h_pic
233 */
234static void parse_ppp_data(struct seq_file *m)
235{
236 struct hvcall_ppp_data ppp_data;
237 int rc;
238
239 rc = h_get_ppp(&ppp_data);
240 if (rc)
241 return;
242
243 seq_printf(m, "partition_entitled_capacity=%ld\n",
244 ppp_data.entitlement);
245 seq_printf(m, "group=%d\n", ppp_data.group_num);
246 seq_printf(m, "system_active_processors=%d\n",
247 ppp_data.active_system_procs);
248
249 /* pool related entries are apropriate for shared configs */
250 if (lppaca[0].shared_proc) {
251 unsigned long pool_idle_time, pool_procs;
252
253 seq_printf(m, "pool=%d\n", ppp_data.pool_num);
254
255 /* report pool_capacity in percentage */
256 seq_printf(m, "pool_capacity=%d\n",
257 ppp_data.active_procs_in_pool * 100);
258
259 h_pic(&pool_idle_time, &pool_procs);
260 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
261 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
262 }
263
264 seq_printf(m, "unallocated_capacity_weight=%d\n",
265 ppp_data.unallocated_weight);
266 seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
267 seq_printf(m, "capped=%d\n", ppp_data.capped);
268 seq_printf(m, "unallocated_capacity=%ld\n",
269 ppp_data.unallocated_entitlement);
270}
271
272/**
273 * parse_mpp_data
274 * Parse out data returned from h_get_mpp
275 */
276static void parse_mpp_data(struct seq_file *m)
277{
278 struct hvcall_mpp_data mpp_data;
279 int rc;
280
281 rc = h_get_mpp(&mpp_data);
282 if (rc)
283 return;
284
285 seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
286
287 if (mpp_data.mapped_mem != -1)
288 seq_printf(m, "mapped_entitled_memory=%ld\n",
289 mpp_data.mapped_mem);
290
291 seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
292 seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
293
294 seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
295 seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
296 mpp_data.unallocated_mem_weight);
297 seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
298 mpp_data.unallocated_entitlement);
299
300 if (mpp_data.pool_size != -1)
301 seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
302 mpp_data.pool_size);
303
304 seq_printf(m, "entitled_memory_loan_request=%ld\n",
305 mpp_data.loan_request);
306
307 seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
211} 308}
212 309
213#define SPLPAR_CHARACTERISTICS_TOKEN 20 310#define SPLPAR_CHARACTERISTICS_TOKEN 20
@@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void)
313 return count; 410 return count;
314} 411}
315 412
413static void pseries_cmo_data(struct seq_file *m)
414{
415 int cpu;
416 unsigned long cmo_faults = 0;
417 unsigned long cmo_fault_time = 0;
418
419 if (!firmware_has_feature(FW_FEATURE_CMO))
420 return;
421
422 for_each_possible_cpu(cpu) {
423 cmo_faults += lppaca[cpu].cmo_faults;
424 cmo_fault_time += lppaca[cpu].cmo_fault_time;
425 }
426
427 seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
428 seq_printf(m, "cmo_fault_time_usec=%lu\n",
429 cmo_fault_time / tb_ticks_per_usec);
430}
431
316static int pseries_lparcfg_data(struct seq_file *m, void *v) 432static int pseries_lparcfg_data(struct seq_file *m, void *v)
317{ 433{
318 int partition_potential_processors; 434 int partition_potential_processors;
@@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
334 partition_active_processors = lparcfg_count_active_processors(); 450 partition_active_processors = lparcfg_count_active_processors();
335 451
336 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 452 if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
337 unsigned long h_entitled, h_unallocated;
338 unsigned long h_aggregation, h_resource;
339 unsigned long pool_idle_time, pool_procs;
340 unsigned long purr;
341
342 h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
343 &h_resource);
344
345 seq_printf(m, "R4=0x%lx\n", h_entitled);
346 seq_printf(m, "R5=0x%lx\n", h_unallocated);
347 seq_printf(m, "R6=0x%lx\n", h_aggregation);
348 seq_printf(m, "R7=0x%lx\n", h_resource);
349
350 purr = get_purr();
351
352 /* this call handles the ibm,get-system-parameter contents */ 453 /* this call handles the ibm,get-system-parameter contents */
353 parse_system_parameter_string(m); 454 parse_system_parameter_string(m);
455 parse_ppp_data(m);
456 parse_mpp_data(m);
457 pseries_cmo_data(m);
354 458
355 seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); 459 seq_printf(m, "purr=%ld\n", get_purr());
356
357 seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
358
359 seq_printf(m, "system_active_processors=%ld\n",
360 (h_resource >> 0 * 8) & 0xffff);
361
362 /* pool related entries are apropriate for shared configs */
363 if (lppaca[0].shared_proc) {
364
365 h_pic(&pool_idle_time, &pool_procs);
366
367 seq_printf(m, "pool=%ld\n",
368 (h_aggregation >> 0 * 8) & 0xffff);
369
370 /* report pool_capacity in percentage */
371 seq_printf(m, "pool_capacity=%ld\n",
372 ((h_resource >> 2 * 8) & 0xffff) * 100);
373
374 seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
375
376 seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
377 }
378
379 seq_printf(m, "unallocated_capacity_weight=%ld\n",
380 (h_resource >> 4 * 8) & 0xFF);
381
382 seq_printf(m, "capacity_weight=%ld\n",
383 (h_resource >> 5 * 8) & 0xFF);
384
385 seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
386
387 seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
388
389 seq_printf(m, "purr=%ld\n", purr);
390
391 } else { /* non SPLPAR case */ 460 } else { /* non SPLPAR case */
392 461
393 seq_printf(m, "system_active_processors=%d\n", 462 seq_printf(m, "system_active_processors=%d\n",
@@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
414 return 0; 483 return 0;
415} 484}
416 485
486static ssize_t update_ppp(u64 *entitlement, u8 *weight)
487{
488 struct hvcall_ppp_data ppp_data;
489 u8 new_weight;
490 u64 new_entitled;
491 ssize_t retval;
492
493 /* Get our current parameters */
494 retval = h_get_ppp(&ppp_data);
495 if (retval)
496 return retval;
497
498 if (entitlement) {
499 new_weight = ppp_data.weight;
500 new_entitled = *entitlement;
501 } else if (weight) {
502 new_weight = *weight;
503 new_entitled = ppp_data.entitlement;
504 } else
505 return -EINVAL;
506
507 pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
508 __FUNCTION__, ppp_data.entitlement, ppp_data.weight);
509
510 pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
511 __FUNCTION__, new_entitled, new_weight);
512
513 retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
514 return retval;
515}
516
517/**
518 * update_mpp
519 *
520 * Update the memory entitlement and weight for the partition. Caller must
521 * specify either a new entitlement or weight, not both, to be updated
522 * since the h_set_mpp call takes both entitlement and weight as parameters.
523 */
524static ssize_t update_mpp(u64 *entitlement, u8 *weight)
525{
526 struct hvcall_mpp_data mpp_data;
527 u64 new_entitled;
528 u8 new_weight;
529 ssize_t rc;
530
531 if (entitlement) {
532 /* Check with vio to ensure the new memory entitlement
533 * can be handled.
534 */
535 rc = vio_cmo_entitlement_update(*entitlement);
536 if (rc)
537 return rc;
538 }
539
540 rc = h_get_mpp(&mpp_data);
541 if (rc)
542 return rc;
543
544 if (entitlement) {
545 new_weight = mpp_data.mem_weight;
546 new_entitled = *entitlement;
547 } else if (weight) {
548 new_weight = *weight;
549 new_entitled = mpp_data.entitled_mem;
550 } else
551 return -EINVAL;
552
553 pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
554 __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
555
556 pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
557 __FUNCTION__, new_entitled, new_weight);
558
559 rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
560 return rc;
561}
562
417/* 563/*
418 * Interface for changing system parameters (variable capacity weight 564 * Interface for changing system parameters (variable capacity weight
419 * and entitled capacity). Format of input is "param_name=value"; 565 * and entitled capacity). Format of input is "param_name=value";
@@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
427static ssize_t lparcfg_write(struct file *file, const char __user * buf, 573static ssize_t lparcfg_write(struct file *file, const char __user * buf,
428 size_t count, loff_t * off) 574 size_t count, loff_t * off)
429{ 575{
430 char *kbuf; 576 int kbuf_sz = 64;
577 char kbuf[kbuf_sz];
431 char *tmp; 578 char *tmp;
432 u64 new_entitled, *new_entitled_ptr = &new_entitled; 579 u64 new_entitled, *new_entitled_ptr = &new_entitled;
433 u8 new_weight, *new_weight_ptr = &new_weight; 580 u8 new_weight, *new_weight_ptr = &new_weight;
434 581 ssize_t retval;
435 unsigned long current_entitled; /* parameters for h_get_ppp */
436 unsigned long dummy;
437 unsigned long resource;
438 u8 current_weight;
439
440 ssize_t retval = -ENOMEM;
441 582
442 if (!firmware_has_feature(FW_FEATURE_SPLPAR) || 583 if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
443 firmware_has_feature(FW_FEATURE_ISERIES)) 584 firmware_has_feature(FW_FEATURE_ISERIES))
444 return -EINVAL; 585 return -EINVAL;
445 586
446 kbuf = kmalloc(count, GFP_KERNEL); 587 if (count > kbuf_sz)
447 if (!kbuf) 588 return -EINVAL;
448 goto out;
449 589
450 retval = -EFAULT;
451 if (copy_from_user(kbuf, buf, count)) 590 if (copy_from_user(kbuf, buf, count))
452 goto out; 591 return -EFAULT;
453 592
454 retval = -EINVAL;
455 kbuf[count - 1] = '\0'; 593 kbuf[count - 1] = '\0';
456 tmp = strchr(kbuf, '='); 594 tmp = strchr(kbuf, '=');
457 if (!tmp) 595 if (!tmp)
458 goto out; 596 return -EINVAL;
459 597
460 *tmp++ = '\0'; 598 *tmp++ = '\0';
461 599
@@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
463 char *endp; 601 char *endp;
464 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); 602 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
465 if (endp == tmp) 603 if (endp == tmp)
466 goto out; 604 return -EINVAL;
467 new_weight_ptr = &current_weight; 605
606 retval = update_ppp(new_entitled_ptr, NULL);
468 } else if (!strcmp(kbuf, "capacity_weight")) { 607 } else if (!strcmp(kbuf, "capacity_weight")) {
469 char *endp; 608 char *endp;
470 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); 609 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
471 if (endp == tmp) 610 if (endp == tmp)
472 goto out; 611 return -EINVAL;
473 new_entitled_ptr = &current_entitled;
474 } else
475 goto out;
476
477 /* Get our current parameters */
478 retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
479 if (retval) {
480 retval = -EIO;
481 goto out;
482 }
483
484 current_weight = (resource >> 5 * 8) & 0xFF;
485 612
486 pr_debug("%s: current_entitled = %lu, current_weight = %u\n", 613 retval = update_ppp(NULL, new_weight_ptr);
487 __func__, current_entitled, current_weight); 614 } else if (!strcmp(kbuf, "entitled_memory")) {
615 char *endp;
616 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
617 if (endp == tmp)
618 return -EINVAL;
488 619
489 pr_debug("%s: new_entitled = %lu, new_weight = %u\n", 620 retval = update_mpp(new_entitled_ptr, NULL);
490 __func__, *new_entitled_ptr, *new_weight_ptr); 621 } else if (!strcmp(kbuf, "entitled_memory_weight")) {
622 char *endp;
623 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
624 if (endp == tmp)
625 return -EINVAL;
491 626
492 retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, 627 retval = update_mpp(NULL, new_weight_ptr);
493 *new_weight_ptr); 628 } else
629 return -EINVAL;
494 630
495 if (retval == H_SUCCESS || retval == H_CONSTRAINED) { 631 if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
496 retval = count; 632 retval = count;
@@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
506 retval = -EIO; 642 retval = -EIO;
507 } 643 }
508 644
509out:
510 kfree(kbuf);
511 return retval; 645 return retval;
512} 646}
513 647
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 219f3634115e..db2497ccc111 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -47,6 +47,8 @@
47#ifdef CONFIG_PPC64 47#ifdef CONFIG_PPC64
48#include <asm/firmware.h> 48#include <asm/firmware.h>
49#endif 49#endif
50#include <linux/kprobes.h>
51#include <linux/kdebug.h>
50 52
51extern unsigned long _get_SP(void); 53extern unsigned long _get_SP(void);
52 54
@@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void)
239} 241}
240#endif /* CONFIG_SMP */ 242#endif /* CONFIG_SMP */
241 243
244void do_dabr(struct pt_regs *regs, unsigned long address,
245 unsigned long error_code)
246{
247 siginfo_t info;
248
249 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
250 11, SIGSEGV) == NOTIFY_STOP)
251 return;
252
253 if (debugger_dabr_match(regs))
254 return;
255
256 /* Clear the DAC and struct entries. One shot trigger */
257#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
258 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
259 | DBCR0_IDM));
260#endif
261
262 /* Clear the DABR */
263 set_dabr(0);
264
265 /* Deliver the signal to userspace */
266 info.si_signo = SIGTRAP;
267 info.si_errno = 0;
268 info.si_code = TRAP_HWBKPT;
269 info.si_addr = (void __user *)address;
270 force_sig_info(SIGTRAP, &info, current);
271}
272
242static DEFINE_PER_CPU(unsigned long, current_dabr); 273static DEFINE_PER_CPU(unsigned long, current_dabr);
243 274
244int set_dabr(unsigned long dabr) 275int set_dabr(unsigned long dabr)
@@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr)
254#if defined(CONFIG_PPC64) || defined(CONFIG_6xx) 285#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
255 mtspr(SPRN_DABR, dabr); 286 mtspr(SPRN_DABR, dabr);
256#endif 287#endif
288
289#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
290 mtspr(SPRN_DAC1, dabr);
291#endif
292
257 return 0; 293 return 0;
258} 294}
259 295
@@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
337 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) 373 if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
338 set_dabr(new->thread.dabr); 374 set_dabr(new->thread.dabr);
339 375
376#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
377 /* If new thread DAC (HW breakpoint) is the same then leave it */
378 if (new->thread.dabr)
379 set_dabr(new->thread.dabr);
380#endif
381
340 new_thread = &new->thread; 382 new_thread = &new->thread;
341 old_thread = &current->thread; 383 old_thread = &current->thread;
342 384
@@ -525,6 +567,10 @@ void flush_thread(void)
525 if (current->thread.dabr) { 567 if (current->thread.dabr) {
526 current->thread.dabr = 0; 568 current->thread.dabr = 0;
527 set_dabr(0); 569 set_dabr(0);
570
571#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
572 current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
573#endif
528 } 574 }
529} 575}
530 576
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1ea8c8d3ce89..c4ab2195b9cb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void)
643#else 643#else
644#define OV5_MSI 0x00 644#define OV5_MSI 0x00
645#endif /* CONFIG_PCI_MSI */ 645#endif /* CONFIG_PCI_MSI */
646#ifdef CONFIG_PPC_SMLPAR
647#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
648#else
649#define OV5_CMO 0x00
650#endif
646 651
647/* 652/*
648 * The architecture vector has an array of PVR mask/value pairs, 653 * The architecture vector has an array of PVR mask/value pairs,
@@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = {
687 0, /* don't halt */ 692 0, /* don't halt */
688 693
689 /* option vector 5: PAPR/OF options */ 694 /* option vector 5: PAPR/OF options */
690 3 - 2, /* length */ 695 5 - 2, /* length */
691 0, /* don't ignore, don't halt */ 696 0, /* don't ignore, don't halt */
692 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | 697 OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
693 OV5_DONATE_DEDICATE_CPU | OV5_MSI, 698 OV5_DONATE_DEDICATE_CPU | OV5_MSI,
699 0,
700 OV5_CMO,
694}; 701};
695 702
696/* Old method - ELF header with PT_NOTE sections */ 703/* Old method - ELF header with PT_NOTE sections */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8feb93e7890c..a5d0e78779c8 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task)
703 703
704 if (regs != NULL) { 704 if (regs != NULL) {
705#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 705#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
706 task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC; 706 task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
707 regs->msr |= MSR_DE; 707 regs->msr |= MSR_DE;
708#else 708#else
709 regs->msr |= MSR_SE; 709 regs->msr |= MSR_SE;
@@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task)
716{ 716{
717 struct pt_regs *regs = task->thread.regs; 717 struct pt_regs *regs = task->thread.regs;
718 718
719
720#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
721 /* If DAC then do not single step, skip */
722 if (task->thread.dabr)
723 return;
724#endif
725
719 if (regs != NULL) { 726 if (regs != NULL) {
720#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 727#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
721 task->thread.dbcr0 = 0; 728 task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
722 regs->msr &= ~MSR_DE; 729 regs->msr &= ~MSR_DE;
723#else 730#else
724 regs->msr &= ~MSR_SE; 731 regs->msr &= ~MSR_SE;
@@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task)
727 clear_tsk_thread_flag(task, TIF_SINGLESTEP); 734 clear_tsk_thread_flag(task, TIF_SINGLESTEP);
728} 735}
729 736
730static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, 737int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
731 unsigned long data) 738 unsigned long data)
732{ 739{
733 /* We only support one DABR and no IABRS at the moment */ 740 /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
741 * For embedded processors we support one DAC and no IAC's at the
742 * moment.
743 */
734 if (addr > 0) 744 if (addr > 0)
735 return -EINVAL; 745 return -EINVAL;
736 746
737 /* The bottom 3 bits are flags */
738 if ((data & ~0x7UL) >= TASK_SIZE) 747 if ((data & ~0x7UL) >= TASK_SIZE)
739 return -EIO; 748 return -EIO;
740 749
741 /* Ensure translation is on */ 750#ifdef CONFIG_PPC64
751
752 /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
753 * It was assumed, on previous implementations, that 3 bits were
754 * passed together with the data address, fitting the design of the
755 * DABR register, as follows:
756 *
757 * bit 0: Read flag
758 * bit 1: Write flag
759 * bit 2: Breakpoint translation
760 *
761 * Thus, we use them here as so.
762 */
763
764 /* Ensure breakpoint translation bit is set */
742 if (data && !(data & DABR_TRANSLATION)) 765 if (data && !(data & DABR_TRANSLATION))
743 return -EIO; 766 return -EIO;
744 767
768 /* Move contents to the DABR register */
745 task->thread.dabr = data; 769 task->thread.dabr = data;
770
771#endif
772#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
773
774 /* As described above, it was assumed 3 bits were passed with the data
775 * address, but we will assume only the mode bits will be passed
776 * as to not cause alignment restrictions for DAC-based processors.
777 */
778
779 /* DAC's hold the whole address without any mode flags */
780 task->thread.dabr = data & ~0x3UL;
781
782 if (task->thread.dabr == 0) {
783 task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
784 task->thread.regs->msr &= ~MSR_DE;
785 return 0;
786 }
787
788 /* Read or Write bits must be set */
789
790 if (!(data & 0x3UL))
791 return -EINVAL;
792
793 /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
794 register */
795 task->thread.dbcr0 = DBCR0_IDM;
796
797 /* Check for write and read flags and set DBCR0
798 accordingly */
799 if (data & 0x1UL)
800 task->thread.dbcr0 |= DBSR_DAC1R;
801 if (data & 0x2UL)
802 task->thread.dbcr0 |= DBSR_DAC1W;
803
804 task->thread.regs->msr |= MSR_DE;
805#endif
746 return 0; 806 return 0;
747} 807}
748 808
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index ad55488939c3..7aada783ec6a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
145 * user space. The DABR will have been cleared if it 145 * user space. The DABR will have been cleared if it
146 * triggered inside the kernel. 146 * triggered inside the kernel.
147 */ 147 */
148 if (current->thread.dabr) 148 if (current->thread.dabr) {
149 set_dabr(current->thread.dabr); 149 set_dabr(current->thread.dabr);
150#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
151 mtspr(SPRN_DBCR0, current->thread.dbcr0);
152#endif
153 }
150 154
151 if (is32) { 155 if (is32) {
152 if (ka.sa.sa_flags & SA_SIGINFO) 156 if (ka.sa.sa_flags & SA_SIGINFO)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index aba0ba95f062..800e5e9a087b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -529,7 +529,8 @@ static void register_nodes(void)
529#endif 529#endif
530 530
531/* Only valid if CPU is present. */ 531/* Only valid if CPU is present. */
532static ssize_t show_physical_id(struct sys_device *dev, char *buf) 532static ssize_t show_physical_id(struct sys_device *dev,
533 struct sysdev_attribute *attr, char *buf)
533{ 534{
534 struct cpu *cpu = container_of(dev, struct cpu, sysdev); 535 struct cpu *cpu = container_of(dev, struct cpu, sysdev);
535 536
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 878fbddb6ae1..81ccb8dd1a54 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
1067 } 1067 }
1068 1068
1069 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); 1069 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1070 } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
1071 regs->msr &= ~MSR_DE;
1072
1073 if (user_mode(regs)) {
1074 current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
1075 DBCR0_IDM);
1076 } else {
1077 /* Disable DAC interupts */
1078 mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
1079 DBSR_DAC1W | DBCR0_IDM));
1080
1081 /* Clear the DAC event */
1082 mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
1083 }
1084 /* Setup and send the trap to the handler */
1085 do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
1070 } 1086 }
1071} 1087}
1072#endif /* CONFIG_4xx || CONFIG_BOOKE */ 1088#endif /* CONFIG_4xx || CONFIG_BOOKE */
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index b77f8af7ddde..ade8aeaa2e70 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1,11 +1,12 @@
1/* 1/*
2 * IBM PowerPC Virtual I/O Infrastructure Support. 2 * IBM PowerPC Virtual I/O Infrastructure Support.
3 * 3 *
4 * Copyright (c) 2003-2005 IBM Corp. 4 * Copyright (c) 2003,2008 IBM Corp.
5 * Dave Engebretsen engebret@us.ibm.com 5 * Dave Engebretsen engebret@us.ibm.com
6 * Santiago Leon santil@us.ibm.com 6 * Santiago Leon santil@us.ibm.com
7 * Hollis Blanchard <hollisb@us.ibm.com> 7 * Hollis Blanchard <hollisb@us.ibm.com>
8 * Stephen Rothwell 8 * Stephen Rothwell
9 * Robert Jennings <rcjenn@us.ibm.com>
9 * 10 *
10 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */
46 .dev.bus = &vio_bus_type, 47 .dev.bus = &vio_bus_type,
47}; 48};
48 49
50#ifdef CONFIG_PPC_SMLPAR
51/**
52 * vio_cmo_pool - A pool of IO memory for CMO use
53 *
54 * @size: The size of the pool in bytes
55 * @free: The amount of free memory in the pool
56 */
57struct vio_cmo_pool {
58 size_t size;
59 size_t free;
60};
61
62/* How many ms to delay queued balance work */
63#define VIO_CMO_BALANCE_DELAY 100
64
65/* Portion out IO memory to CMO devices by this chunk size */
66#define VIO_CMO_BALANCE_CHUNK 131072
67
68/**
69 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
70 *
71 * @vio_dev: struct vio_dev pointer
72 * @list: pointer to other devices on bus that are being tracked
73 */
74struct vio_cmo_dev_entry {
75 struct vio_dev *viodev;
76 struct list_head list;
77};
78
79/**
80 * vio_cmo - VIO bus accounting structure for CMO entitlement
81 *
82 * @lock: spinlock for entire structure
83 * @balance_q: work queue for balancing system entitlement
84 * @device_list: list of CMO-enabled devices requiring entitlement
85 * @entitled: total system entitlement in bytes
86 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
87 * @excess: pool of excess entitlement not needed for device reserves or spare
88 * @spare: IO memory for device hotplug functionality
89 * @min: minimum necessary for system operation
90 * @desired: desired memory for system operation
91 * @curr: bytes currently allocated
92 * @high: high water mark for IO data usage
93 */
94struct vio_cmo {
95 spinlock_t lock;
96 struct delayed_work balance_q;
97 struct list_head device_list;
98 size_t entitled;
99 struct vio_cmo_pool reserve;
100 struct vio_cmo_pool excess;
101 size_t spare;
102 size_t min;
103 size_t desired;
104 size_t curr;
105 size_t high;
106} vio_cmo;
107
108/**
109 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
110 */
111static int vio_cmo_num_OF_devs(void)
112{
113 struct device_node *node_vroot;
114 int count = 0;
115
116 /*
117 * Count the number of vdevice entries with an
118 * ibm,my-dma-window OF property
119 */
120 node_vroot = of_find_node_by_name(NULL, "vdevice");
121 if (node_vroot) {
122 struct device_node *of_node;
123 struct property *prop;
124
125 for_each_child_of_node(node_vroot, of_node) {
126 prop = of_find_property(of_node, "ibm,my-dma-window",
127 NULL);
128 if (prop)
129 count++;
130 }
131 }
132 of_node_put(node_vroot);
133 return count;
134}
135
136/**
137 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
138 *
139 * @viodev: VIO device requesting IO memory
140 * @size: size of allocation requested
141 *
142 * Allocations come from memory reserved for the devices and any excess
143 * IO memory available to all devices. The spare pool used to service
144 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
145 * made available.
146 *
147 * Return codes:
148 * 0 for successful allocation and -ENOMEM for a failure
149 */
150static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
151{
152 unsigned long flags;
153 size_t reserve_free = 0;
154 size_t excess_free = 0;
155 int ret = -ENOMEM;
156
157 spin_lock_irqsave(&vio_cmo.lock, flags);
158
159 /* Determine the amount of free entitlement available in reserve */
160 if (viodev->cmo.entitled > viodev->cmo.allocated)
161 reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
162
163 /* If spare is not fulfilled, the excess pool can not be used. */
164 if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
165 excess_free = vio_cmo.excess.free;
166
167 /* The request can be satisfied */
168 if ((reserve_free + excess_free) >= size) {
169 vio_cmo.curr += size;
170 if (vio_cmo.curr > vio_cmo.high)
171 vio_cmo.high = vio_cmo.curr;
172 viodev->cmo.allocated += size;
173 size -= min(reserve_free, size);
174 vio_cmo.excess.free -= size;
175 ret = 0;
176 }
177
178 spin_unlock_irqrestore(&vio_cmo.lock, flags);
179 return ret;
180}
181
182/**
183 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
184 * @viodev: VIO device freeing IO memory
185 * @size: size of deallocation
186 *
187 * IO memory is freed by the device back to the correct memory pools.
188 * The spare pool is replenished first from either memory pool, then
189 * the reserve pool is used to reduce device entitlement, the excess
190 * pool is used to increase the reserve pool toward the desired entitlement
191 * target, and then the remaining memory is returned to the pools.
192 *
193 */
194static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
195{
196 unsigned long flags;
197 size_t spare_needed = 0;
198 size_t excess_freed = 0;
199 size_t reserve_freed = size;
200 size_t tmp;
201 int balance = 0;
202
203 spin_lock_irqsave(&vio_cmo.lock, flags);
204 vio_cmo.curr -= size;
205
206 /* Amount of memory freed from the excess pool */
207 if (viodev->cmo.allocated > viodev->cmo.entitled) {
208 excess_freed = min(reserve_freed, (viodev->cmo.allocated -
209 viodev->cmo.entitled));
210 reserve_freed -= excess_freed;
211 }
212
213 /* Remove allocation from device */
214 viodev->cmo.allocated -= (reserve_freed + excess_freed);
215
216 /* Spare is a subset of the reserve pool, replenish it first. */
217 spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
218
219 /*
220 * Replenish the spare in the reserve pool from the excess pool.
221 * This moves entitlement into the reserve pool.
222 */
223 if (spare_needed && excess_freed) {
224 tmp = min(excess_freed, spare_needed);
225 vio_cmo.excess.size -= tmp;
226 vio_cmo.reserve.size += tmp;
227 vio_cmo.spare += tmp;
228 excess_freed -= tmp;
229 spare_needed -= tmp;
230 balance = 1;
231 }
232
233 /*
234 * Replenish the spare in the reserve pool from the reserve pool.
235 * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
236 * if needed, and gives it to the spare pool. The amount of used
237 * memory in this pool does not change.
238 */
239 if (spare_needed && reserve_freed) {
240 tmp = min(spare_needed, min(reserve_freed,
241 (viodev->cmo.entitled -
242 VIO_CMO_MIN_ENT)));
243
244 vio_cmo.spare += tmp;
245 viodev->cmo.entitled -= tmp;
246 reserve_freed -= tmp;
247 spare_needed -= tmp;
248 balance = 1;
249 }
250
251 /*
252 * Increase the reserve pool until the desired allocation is met.
253 * Move an allocation freed from the excess pool into the reserve
254 * pool and schedule a balance operation.
255 */
256 if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
257 tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
258
259 vio_cmo.excess.size -= tmp;
260 vio_cmo.reserve.size += tmp;
261 excess_freed -= tmp;
262 balance = 1;
263 }
264
265 /* Return memory from the excess pool to that pool */
266 if (excess_freed)
267 vio_cmo.excess.free += excess_freed;
268
269 if (balance)
270 schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
271 spin_unlock_irqrestore(&vio_cmo.lock, flags);
272}
273
274/**
275 * vio_cmo_entitlement_update - Manage system entitlement changes
276 *
277 * @new_entitlement: new system entitlement to attempt to accommodate
278 *
279 * Increases in entitlement will be used to fulfill the spare entitlement
280 * and the rest is given to the excess pool. Decreases, if they are
281 * possible, come from the excess pool and from unused device entitlement
282 *
283 * Returns: 0 on success, -ENOMEM when change can not be made
284 */
285int vio_cmo_entitlement_update(size_t new_entitlement)
286{
287 struct vio_dev *viodev;
288 struct vio_cmo_dev_entry *dev_ent;
289 unsigned long flags;
290 size_t avail, delta, tmp;
291
292 spin_lock_irqsave(&vio_cmo.lock, flags);
293
294 /* Entitlement increases */
295 if (new_entitlement > vio_cmo.entitled) {
296 delta = new_entitlement - vio_cmo.entitled;
297
298 /* Fulfill spare allocation */
299 if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
300 tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
301 vio_cmo.spare += tmp;
302 vio_cmo.reserve.size += tmp;
303 delta -= tmp;
304 }
305
306 /* Remaining new allocation goes to the excess pool */
307 vio_cmo.entitled += delta;
308 vio_cmo.excess.size += delta;
309 vio_cmo.excess.free += delta;
310
311 goto out;
312 }
313
314 /* Entitlement decreases */
315 delta = vio_cmo.entitled - new_entitlement;
316 avail = vio_cmo.excess.free;
317
318 /*
319 * Need to check how much unused entitlement each device can
320 * sacrifice to fulfill entitlement change.
321 */
322 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
323 if (avail >= delta)
324 break;
325
326 viodev = dev_ent->viodev;
327 if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
328 (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
329 avail += viodev->cmo.entitled -
330 max_t(size_t, viodev->cmo.allocated,
331 VIO_CMO_MIN_ENT);
332 }
333
334 if (delta <= avail) {
335 vio_cmo.entitled -= delta;
336
337 /* Take entitlement from the excess pool first */
338 tmp = min(vio_cmo.excess.free, delta);
339 vio_cmo.excess.size -= tmp;
340 vio_cmo.excess.free -= tmp;
341 delta -= tmp;
342
343 /*
344 * Remove all but VIO_CMO_MIN_ENT bytes from devices
345 * until entitlement change is served
346 */
347 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
348 if (!delta)
349 break;
350
351 viodev = dev_ent->viodev;
352 tmp = 0;
353 if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
354 (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
355 tmp = viodev->cmo.entitled -
356 max_t(size_t, viodev->cmo.allocated,
357 VIO_CMO_MIN_ENT);
358 viodev->cmo.entitled -= min(tmp, delta);
359 delta -= min(tmp, delta);
360 }
361 } else {
362 spin_unlock_irqrestore(&vio_cmo.lock, flags);
363 return -ENOMEM;
364 }
365
366out:
367 schedule_delayed_work(&vio_cmo.balance_q, 0);
368 spin_unlock_irqrestore(&vio_cmo.lock, flags);
369 return 0;
370}
371
372/**
373 * vio_cmo_balance - Balance entitlement among devices
374 *
375 * @work: work queue structure for this operation
376 *
377 * Any system entitlement above the minimum needed for devices, or
378 * already allocated to devices, can be distributed to the devices.
379 * The list of devices is iterated through to recalculate the desired
380 * entitlement level and to determine how much entitlement above the
381 * minimum entitlement is allocated to devices.
382 *
383 * Small chunks of the available entitlement are given to devices until
384 * their requirements are fulfilled or there is no entitlement left to give.
385 * Upon completion sizes of the reserve and excess pools are calculated.
386 *
387 * The system minimum entitlement level is also recalculated here.
388 * Entitlement will be reserved for devices even after vio_bus_remove to
389 * accommodate reloading the driver. The OF tree is walked to count the
390 * number of devices present and this will remove entitlement for devices
391 * that have actually left the system after having vio_bus_remove called.
392 */
393static void vio_cmo_balance(struct work_struct *work)
394{
395 struct vio_cmo *cmo;
396 struct vio_dev *viodev;
397 struct vio_cmo_dev_entry *dev_ent;
398 unsigned long flags;
399 size_t avail = 0, level, chunk, need;
400 int devcount = 0, fulfilled;
401
402 cmo = container_of(work, struct vio_cmo, balance_q.work);
403
404 spin_lock_irqsave(&vio_cmo.lock, flags);
405
406 /* Calculate minimum entitlement and fulfill spare */
407 cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
408 BUG_ON(cmo->min > cmo->entitled);
409 cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
410 cmo->min += cmo->spare;
411 cmo->desired = cmo->min;
412
413 /*
414 * Determine how much entitlement is available and reset device
415 * entitlements
416 */
417 avail = cmo->entitled - cmo->spare;
418 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
419 viodev = dev_ent->viodev;
420 devcount++;
421 viodev->cmo.entitled = VIO_CMO_MIN_ENT;
422 cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
423 avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
424 }
425
426 /*
427 * Having provided each device with the minimum entitlement, loop
428 * over the devices portioning out the remaining entitlement
429 * until there is nothing left.
430 */
431 level = VIO_CMO_MIN_ENT;
432 while (avail) {
433 fulfilled = 0;
434 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
435 viodev = dev_ent->viodev;
436
437 if (viodev->cmo.desired <= level) {
438 fulfilled++;
439 continue;
440 }
441
442 /*
443 * Give the device up to VIO_CMO_BALANCE_CHUNK
444 * bytes of entitlement, but do not exceed the
445 * desired level of entitlement for the device.
446 */
447 chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
448 chunk = min(chunk, (viodev->cmo.desired -
449 viodev->cmo.entitled));
450 viodev->cmo.entitled += chunk;
451
452 /*
453 * If the memory for this entitlement increase was
454 * already allocated to the device it does not come
455 * from the available pool being portioned out.
456 */
457 need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
458 max(viodev->cmo.allocated, level);
459 avail -= need;
460
461 }
462 if (fulfilled == devcount)
463 break;
464 level += VIO_CMO_BALANCE_CHUNK;
465 }
466
467 /* Calculate new reserve and excess pool sizes */
468 cmo->reserve.size = cmo->min;
469 cmo->excess.free = 0;
470 cmo->excess.size = 0;
471 need = 0;
472 list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
473 viodev = dev_ent->viodev;
474 /* Calculated reserve size above the minimum entitlement */
475 if (viodev->cmo.entitled)
476 cmo->reserve.size += (viodev->cmo.entitled -
477 VIO_CMO_MIN_ENT);
478 /* Calculated used excess entitlement */
479 if (viodev->cmo.allocated > viodev->cmo.entitled)
480 need += viodev->cmo.allocated - viodev->cmo.entitled;
481 }
482 cmo->excess.size = cmo->entitled - cmo->reserve.size;
483 cmo->excess.free = cmo->excess.size - need;
484
485 cancel_delayed_work(container_of(work, struct delayed_work, work));
486 spin_unlock_irqrestore(&vio_cmo.lock, flags);
487}
488
489static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
490 dma_addr_t *dma_handle, gfp_t flag)
491{
492 struct vio_dev *viodev = to_vio_dev(dev);
493 void *ret;
494
495 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
496 atomic_inc(&viodev->cmo.allocs_failed);
497 return NULL;
498 }
499
500 ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
501 if (unlikely(ret == NULL)) {
502 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
503 atomic_inc(&viodev->cmo.allocs_failed);
504 }
505
506 return ret;
507}
508
509static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
510 void *vaddr, dma_addr_t dma_handle)
511{
512 struct vio_dev *viodev = to_vio_dev(dev);
513
514 dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
515
516 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
517}
518
519static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
520 size_t size,
521 enum dma_data_direction direction,
522 struct dma_attrs *attrs)
523{
524 struct vio_dev *viodev = to_vio_dev(dev);
525 dma_addr_t ret = DMA_ERROR_CODE;
526
527 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
528 atomic_inc(&viodev->cmo.allocs_failed);
529 return ret;
530 }
531
532 ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
533 if (unlikely(dma_mapping_error(ret))) {
534 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
535 atomic_inc(&viodev->cmo.allocs_failed);
536 }
537
538 return ret;
539}
540
541static void vio_dma_iommu_unmap_single(struct device *dev,
542 dma_addr_t dma_handle, size_t size,
543 enum dma_data_direction direction,
544 struct dma_attrs *attrs)
545{
546 struct vio_dev *viodev = to_vio_dev(dev);
547
548 dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
549
550 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
551}
552
553static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
554 int nelems, enum dma_data_direction direction,
555 struct dma_attrs *attrs)
556{
557 struct vio_dev *viodev = to_vio_dev(dev);
558 struct scatterlist *sgl;
559 int ret, count = 0;
560 size_t alloc_size = 0;
561
562 for (sgl = sglist; count < nelems; count++, sgl++)
563 alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
564
565 if (vio_cmo_alloc(viodev, alloc_size)) {
566 atomic_inc(&viodev->cmo.allocs_failed);
567 return 0;
568 }
569
570 ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
571
572 if (unlikely(!ret)) {
573 vio_cmo_dealloc(viodev, alloc_size);
574 atomic_inc(&viodev->cmo.allocs_failed);
575 }
576
577 for (sgl = sglist, count = 0; count < ret; count++, sgl++)
578 alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
579 if (alloc_size)
580 vio_cmo_dealloc(viodev, alloc_size);
581
582 return ret;
583}
584
585static void vio_dma_iommu_unmap_sg(struct device *dev,
586 struct scatterlist *sglist, int nelems,
587 enum dma_data_direction direction,
588 struct dma_attrs *attrs)
589{
590 struct vio_dev *viodev = to_vio_dev(dev);
591 struct scatterlist *sgl;
592 size_t alloc_size = 0;
593 int count = 0;
594
595 for (sgl = sglist; count < nelems; count++, sgl++)
596 alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
597
598 dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
599
600 vio_cmo_dealloc(viodev, alloc_size);
601}
602
603struct dma_mapping_ops vio_dma_mapping_ops = {
604 .alloc_coherent = vio_dma_iommu_alloc_coherent,
605 .free_coherent = vio_dma_iommu_free_coherent,
606 .map_single = vio_dma_iommu_map_single,
607 .unmap_single = vio_dma_iommu_unmap_single,
608 .map_sg = vio_dma_iommu_map_sg,
609 .unmap_sg = vio_dma_iommu_unmap_sg,
610};
611
612/**
613 * vio_cmo_set_dev_desired - Set desired entitlement for a device
614 *
615 * @viodev: struct vio_dev for device to alter
616 * @new_desired: new desired entitlement level in bytes
617 *
618 * For use by devices to request a change to their entitlement at runtime or
619 * through sysfs. The desired entitlement level is changed and a balancing
620 * of system resources is scheduled to run in the future.
621 */
622void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
623{
624 unsigned long flags;
625 struct vio_cmo_dev_entry *dev_ent;
626 int found = 0;
627
628 if (!firmware_has_feature(FW_FEATURE_CMO))
629 return;
630
631 spin_lock_irqsave(&vio_cmo.lock, flags);
632 if (desired < VIO_CMO_MIN_ENT)
633 desired = VIO_CMO_MIN_ENT;
634
635 /*
636 * Changes will not be made for devices not in the device list.
637 * If it is not in the device list, then no driver is loaded
638 * for the device and it can not receive entitlement.
639 */
640 list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
641 if (viodev == dev_ent->viodev) {
642 found = 1;
643 break;
644 }
645 if (!found)
646 return;
647
648 /* Increase/decrease in desired device entitlement */
649 if (desired >= viodev->cmo.desired) {
650 /* Just bump the bus and device values prior to a balance*/
651 vio_cmo.desired += desired - viodev->cmo.desired;
652 viodev->cmo.desired = desired;
653 } else {
654 /* Decrease bus and device values for desired entitlement */
655 vio_cmo.desired -= viodev->cmo.desired - desired;
656 viodev->cmo.desired = desired;
657 /*
658 * If less entitlement is desired than current entitlement, move
659 * any reserve memory in the change region to the excess pool.
660 */
661 if (viodev->cmo.entitled > desired) {
662 vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
663 vio_cmo.excess.size += viodev->cmo.entitled - desired;
664 /*
665 * If entitlement moving from the reserve pool to the
666 * excess pool is currently unused, add to the excess
667 * free counter.
668 */
669 if (viodev->cmo.allocated < viodev->cmo.entitled)
670 vio_cmo.excess.free += viodev->cmo.entitled -
671 max(viodev->cmo.allocated, desired);
672 viodev->cmo.entitled = desired;
673 }
674 }
675 schedule_delayed_work(&vio_cmo.balance_q, 0);
676 spin_unlock_irqrestore(&vio_cmo.lock, flags);
677}
678
679/**
680 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
681 *
682 * @viodev - Pointer to struct vio_dev for device
683 *
684 * Determine the devices IO memory entitlement needs, attempting
685 * to satisfy the system minimum entitlement at first and scheduling
686 * a balance operation to take care of the rest at a later time.
687 *
688 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
689 * -ENOMEM when entitlement is not available for device or
690 * device entry.
691 *
692 */
693static int vio_cmo_bus_probe(struct vio_dev *viodev)
694{
695 struct vio_cmo_dev_entry *dev_ent;
696 struct device *dev = &viodev->dev;
697 struct vio_driver *viodrv = to_vio_driver(dev->driver);
698 unsigned long flags;
699 size_t size;
700
701 /*
702 * Check to see that device has a DMA window and configure
703 * entitlement for the device.
704 */
705 if (of_get_property(viodev->dev.archdata.of_node,
706 "ibm,my-dma-window", NULL)) {
707 /* Check that the driver is CMO enabled and get desired DMA */
708 if (!viodrv->get_desired_dma) {
709 dev_err(dev, "%s: device driver does not support CMO\n",
710 __func__);
711 return -EINVAL;
712 }
713
714 viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
715 if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
716 viodev->cmo.desired = VIO_CMO_MIN_ENT;
717 size = VIO_CMO_MIN_ENT;
718
719 dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
720 GFP_KERNEL);
721 if (!dev_ent)
722 return -ENOMEM;
723
724 dev_ent->viodev = viodev;
725 spin_lock_irqsave(&vio_cmo.lock, flags);
726 list_add(&dev_ent->list, &vio_cmo.device_list);
727 } else {
728 viodev->cmo.desired = 0;
729 size = 0;
730 spin_lock_irqsave(&vio_cmo.lock, flags);
731 }
732
733 /*
734 * If the needs for vio_cmo.min have not changed since they
735 * were last set, the number of devices in the OF tree has
736 * been constant and the IO memory for this is already in
737 * the reserve pool.
738 */
739 if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
740 VIO_CMO_MIN_ENT)) {
741 /* Updated desired entitlement if device requires it */
742 if (size)
743 vio_cmo.desired += (viodev->cmo.desired -
744 VIO_CMO_MIN_ENT);
745 } else {
746 size_t tmp;
747
748 tmp = vio_cmo.spare + vio_cmo.excess.free;
749 if (tmp < size) {
750 dev_err(dev, "%s: insufficient free "
751 "entitlement to add device. "
752 "Need %lu, have %lu\n", __func__,
753 size, (vio_cmo.spare + tmp));
754 spin_unlock_irqrestore(&vio_cmo.lock, flags);
755 return -ENOMEM;
756 }
757
758 /* Use excess pool first to fulfill request */
759 tmp = min(size, vio_cmo.excess.free);
760 vio_cmo.excess.free -= tmp;
761 vio_cmo.excess.size -= tmp;
762 vio_cmo.reserve.size += tmp;
763
764 /* Use spare if excess pool was insufficient */
765 vio_cmo.spare -= size - tmp;
766
767 /* Update bus accounting */
768 vio_cmo.min += size;
769 vio_cmo.desired += viodev->cmo.desired;
770 }
771 spin_unlock_irqrestore(&vio_cmo.lock, flags);
772 return 0;
773}
774
775/**
776 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
777 *
778 * @viodev - Pointer to struct vio_dev for device
779 *
780 * Remove the device from the cmo device list. The minimum entitlement
781 * will be reserved for the device as long as it is in the system. The
782 * rest of the entitlement the device had been allocated will be returned
783 * to the system.
784 */
785static void vio_cmo_bus_remove(struct vio_dev *viodev)
786{
787 struct vio_cmo_dev_entry *dev_ent;
788 unsigned long flags;
789 size_t tmp;
790
791 spin_lock_irqsave(&vio_cmo.lock, flags);
792 if (viodev->cmo.allocated) {
793 dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
794 "allocated after remove operation.\n",
795 __func__, viodev->cmo.allocated);
796 BUG();
797 }
798
799 /*
800 * Remove the device from the device list being maintained for
801 * CMO enabled devices.
802 */
803 list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
804 if (viodev == dev_ent->viodev) {
805 list_del(&dev_ent->list);
806 kfree(dev_ent);
807 break;
808 }
809
810 /*
811 * Devices may not require any entitlement and they do not need
812 * to be processed. Otherwise, return the device's entitlement
813 * back to the pools.
814 */
815 if (viodev->cmo.entitled) {
816 /*
817 * This device has not yet left the OF tree, it's
818 * minimum entitlement remains in vio_cmo.min and
819 * vio_cmo.desired
820 */
821 vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
822
823 /*
824 * Save min allocation for device in reserve as long
825 * as it exists in OF tree as determined by later
826 * balance operation
827 */
828 viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
829
830 /* Replenish spare from freed reserve pool */
831 if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
832 tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
833 vio_cmo.spare));
834 vio_cmo.spare += tmp;
835 viodev->cmo.entitled -= tmp;
836 }
837
838 /* Remaining reserve goes to excess pool */
839 vio_cmo.excess.size += viodev->cmo.entitled;
840 vio_cmo.excess.free += viodev->cmo.entitled;
841 vio_cmo.reserve.size -= viodev->cmo.entitled;
842
843 /*
844 * Until the device is removed it will keep a
845 * minimum entitlement; this will guarantee that
846 * a module unload/load will result in a success.
847 */
848 viodev->cmo.entitled = VIO_CMO_MIN_ENT;
849 viodev->cmo.desired = VIO_CMO_MIN_ENT;
850 atomic_set(&viodev->cmo.allocs_failed, 0);
851 }
852
853 spin_unlock_irqrestore(&vio_cmo.lock, flags);
854}
855
856static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
857{
858 vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
859 viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
860}
861
862/**
863 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
864 *
865 * Set up the reserve and excess entitlement pools based on available
866 * system entitlement and the number of devices in the OF tree that
867 * require entitlement in the reserve pool.
868 */
869static void vio_cmo_bus_init(void)
870{
871 struct hvcall_mpp_data mpp_data;
872 int err;
873
874 memset(&vio_cmo, 0, sizeof(struct vio_cmo));
875 spin_lock_init(&vio_cmo.lock);
876 INIT_LIST_HEAD(&vio_cmo.device_list);
877 INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
878
879 /* Get current system entitlement */
880 err = h_get_mpp(&mpp_data);
881
882 /*
883 * On failure, continue with entitlement set to 0, will panic()
884 * later when spare is reserved.
885 */
886 if (err != H_SUCCESS) {
887 printk(KERN_ERR "%s: unable to determine system IO "\
888 "entitlement. (%d)\n", __func__, err);
889 vio_cmo.entitled = 0;
890 } else {
891 vio_cmo.entitled = mpp_data.entitled_mem;
892 }
893
894 /* Set reservation and check against entitlement */
895 vio_cmo.spare = VIO_CMO_MIN_ENT;
896 vio_cmo.reserve.size = vio_cmo.spare;
897 vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
898 VIO_CMO_MIN_ENT);
899 if (vio_cmo.reserve.size > vio_cmo.entitled) {
900 printk(KERN_ERR "%s: insufficient system entitlement\n",
901 __func__);
902 panic("%s: Insufficient system entitlement", __func__);
903 }
904
905 /* Set the remaining accounting variables */
906 vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
907 vio_cmo.excess.free = vio_cmo.excess.size;
908 vio_cmo.min = vio_cmo.reserve.size;
909 vio_cmo.desired = vio_cmo.reserve.size;
910}
911
912/* sysfs device functions and data structures for CMO */
913
914#define viodev_cmo_rd_attr(name) \
915static ssize_t viodev_cmo_##name##_show(struct device *dev, \
916 struct device_attribute *attr, \
917 char *buf) \
918{ \
919 return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
920}
921
922static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
923 struct device_attribute *attr, char *buf)
924{
925 struct vio_dev *viodev = to_vio_dev(dev);
926 return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
927}
928
929static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
930 struct device_attribute *attr, const char *buf, size_t count)
931{
932 struct vio_dev *viodev = to_vio_dev(dev);
933 atomic_set(&viodev->cmo.allocs_failed, 0);
934 return count;
935}
936
937static ssize_t viodev_cmo_desired_set(struct device *dev,
938 struct device_attribute *attr, const char *buf, size_t count)
939{
940 struct vio_dev *viodev = to_vio_dev(dev);
941 size_t new_desired;
942 int ret;
943
944 ret = strict_strtoul(buf, 10, &new_desired);
945 if (ret)
946 return ret;
947
948 vio_cmo_set_dev_desired(viodev, new_desired);
949 return count;
950}
951
952viodev_cmo_rd_attr(desired);
953viodev_cmo_rd_attr(entitled);
954viodev_cmo_rd_attr(allocated);
955
956static ssize_t name_show(struct device *, struct device_attribute *, char *);
957static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
958static struct device_attribute vio_cmo_dev_attrs[] = {
959 __ATTR_RO(name),
960 __ATTR_RO(devspec),
961 __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
962 viodev_cmo_desired_show, viodev_cmo_desired_set),
963 __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
964 __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
965 __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
966 viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
967 __ATTR_NULL
968};
969
970/* sysfs bus functions and data structures for CMO */
971
972#define viobus_cmo_rd_attr(name) \
973static ssize_t \
974viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \
975{ \
976 return sprintf(buf, "%lu\n", vio_cmo.name); \
977}
978
979#define viobus_cmo_pool_rd_attr(name, var) \
980static ssize_t \
981viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \
982{ \
983 return sprintf(buf, "%lu\n", vio_cmo.name.var); \
984}
985
986static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
987 size_t count)
988{
989 unsigned long flags;
990
991 spin_lock_irqsave(&vio_cmo.lock, flags);
992 vio_cmo.high = vio_cmo.curr;
993 spin_unlock_irqrestore(&vio_cmo.lock, flags);
994
995 return count;
996}
997
998viobus_cmo_rd_attr(entitled);
999viobus_cmo_pool_rd_attr(reserve, size);
1000viobus_cmo_pool_rd_attr(excess, size);
1001viobus_cmo_pool_rd_attr(excess, free);
1002viobus_cmo_rd_attr(spare);
1003viobus_cmo_rd_attr(min);
1004viobus_cmo_rd_attr(desired);
1005viobus_cmo_rd_attr(curr);
1006viobus_cmo_rd_attr(high);
1007
1008static struct bus_attribute vio_cmo_bus_attrs[] = {
1009 __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
1010 __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
1011 __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
1012 __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
1013 __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL),
1014 __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL),
1015 __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
1016 __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL),
1017 __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
1018 viobus_cmo_high_show, viobus_cmo_high_reset),
1019 __ATTR_NULL
1020};
1021
1022static void vio_cmo_sysfs_init(void)
1023{
1024 vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
1025 vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
1026}
1027#else /* CONFIG_PPC_SMLPAR */
1028/* Dummy functions for iSeries platform */
1029int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1030void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1031static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
1032static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
1033static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
1034static void vio_cmo_bus_init() {}
1035static void vio_cmo_sysfs_init() { }
1036#endif /* CONFIG_PPC_SMLPAR */
1037EXPORT_SYMBOL(vio_cmo_entitlement_update);
1038EXPORT_SYMBOL(vio_cmo_set_dev_desired);
1039
49static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) 1040static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
50{ 1041{
51 const unsigned char *dma_window; 1042 const unsigned char *dma_window;
@@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev)
114 return error; 1105 return error;
115 1106
116 id = vio_match_device(viodrv->id_table, viodev); 1107 id = vio_match_device(viodrv->id_table, viodev);
117 if (id) 1108 if (id) {
1109 memset(&viodev->cmo, 0, sizeof(viodev->cmo));
1110 if (firmware_has_feature(FW_FEATURE_CMO)) {
1111 error = vio_cmo_bus_probe(viodev);
1112 if (error)
1113 return error;
1114 }
118 error = viodrv->probe(viodev, id); 1115 error = viodrv->probe(viodev, id);
1116 if (error)
1117 vio_cmo_bus_remove(viodev);
1118 }
119 1119
120 return error; 1120 return error;
121} 1121}
@@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev)
125{ 1125{
126 struct vio_dev *viodev = to_vio_dev(dev); 1126 struct vio_dev *viodev = to_vio_dev(dev);
127 struct vio_driver *viodrv = to_vio_driver(dev->driver); 1127 struct vio_driver *viodrv = to_vio_driver(dev->driver);
1128 struct device *devptr;
1129 int ret = 1;
1130
1131 /*
1132 * Hold a reference to the device after the remove function is called
1133 * to allow for CMO accounting cleanup for the device.
1134 */
1135 devptr = get_device(dev);
128 1136
129 if (viodrv->remove) 1137 if (viodrv->remove)
130 return viodrv->remove(viodev); 1138 ret = viodrv->remove(viodev);
1139
1140 if (!ret && firmware_has_feature(FW_FEATURE_CMO))
1141 vio_cmo_bus_remove(viodev);
131 1142
132 /* driver can't remove */ 1143 put_device(devptr);
133 return 1; 1144 return ret;
134} 1145}
135 1146
136/** 1147/**
@@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
215 viodev->unit_address = *unit_address; 1226 viodev->unit_address = *unit_address;
216 } 1227 }
217 viodev->dev.archdata.of_node = of_node_get(of_node); 1228 viodev->dev.archdata.of_node = of_node_get(of_node);
218 viodev->dev.archdata.dma_ops = &dma_iommu_ops; 1229
1230 if (firmware_has_feature(FW_FEATURE_CMO))
1231 vio_cmo_set_dma_ops(viodev);
1232 else
1233 viodev->dev.archdata.dma_ops = &dma_iommu_ops;
219 viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); 1234 viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
220 viodev->dev.archdata.numa_node = of_node_to_nid(of_node); 1235 viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
221 1236
@@ -245,6 +1260,9 @@ static int __init vio_bus_init(void)
245 int err; 1260 int err;
246 struct device_node *node_vroot; 1261 struct device_node *node_vroot;
247 1262
1263 if (firmware_has_feature(FW_FEATURE_CMO))
1264 vio_cmo_sysfs_init();
1265
248 err = bus_register(&vio_bus_type); 1266 err = bus_register(&vio_bus_type);
249 if (err) { 1267 if (err) {
250 printk(KERN_ERR "failed to register VIO bus\n"); 1268 printk(KERN_ERR "failed to register VIO bus\n");
@@ -262,6 +1280,9 @@ static int __init vio_bus_init(void)
262 return err; 1280 return err;
263 } 1281 }
264 1282
1283 if (firmware_has_feature(FW_FEATURE_CMO))
1284 vio_cmo_bus_init();
1285
265 node_vroot = of_find_node_by_name(NULL, "vdevice"); 1286 node_vroot = of_find_node_by_name(NULL, "vdevice");
266 if (node_vroot) { 1287 if (node_vroot) {
267 struct device_node *of_node; 1288 struct device_node *of_node;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index a914411bced5..4a8ce62fe112 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
85 85
86 /* The dummy segment contents for the bug workaround mentioned above 86 /* The dummy segment contents for the bug workaround mentioned above
87 near PHDRS. */ 87 near PHDRS. */
88 .dummy : { 88 .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
89 LONG(0xf177) 89 LONG(0xf177)
90 } :kernel :dummy 90 } :kernel :dummy
91 91
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1707d00331fc..565b7a237c84 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs)
100 return 0; 100 return 0;
101} 101}
102 102
103#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
104static void do_dabr(struct pt_regs *regs, unsigned long address,
105 unsigned long error_code)
106{
107 siginfo_t info;
108
109 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
110 11, SIGSEGV) == NOTIFY_STOP)
111 return;
112
113 if (debugger_dabr_match(regs))
114 return;
115
116 /* Clear the DABR */
117 set_dabr(0);
118
119 /* Deliver the signal to userspace */
120 info.si_signo = SIGTRAP;
121 info.si_errno = 0;
122 info.si_code = TRAP_HWBKPT;
123 info.si_addr = (void __user *)address;
124 force_sig_info(SIGTRAP, &info, current);
125}
126#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
127
128/* 103/*
129 * For 600- and 800-family processors, the error_code parameter is DSISR 104 * For 600- and 800-family processors, the error_code parameter is DSISR
130 * for a data fault, SRR1 for an instruction fault. For 400-family processors 105 * for a data fault, SRR1 for an instruction fault. For 400-family processors
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index ccbd4958412e..696a5ee4962d 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,6 @@
1config PPC_MPC52xx 1config PPC_MPC52xx
2 bool "52xx-based boards" 2 bool "52xx-based boards"
3 depends on PPC_MULTIPLATFORM && PPC32 3 depends on PPC_MULTIPLATFORM && PPC32
4 select FSL_SOC
5 select PPC_CLOCK 4 select PPC_CLOCK
6 select PPC_PCI_CHOICE 5 select PPC_PCI_CHOICE
7 6
@@ -49,5 +48,6 @@ config PPC_MPC5200_GPIO
49 bool "MPC5200 GPIO support" 48 bool "MPC5200 GPIO support"
50 depends on PPC_MPC52xx 49 depends on PPC_MPC52xx
51 select ARCH_REQUIRE_GPIOLIB 50 select ARCH_REQUIRE_GPIOLIB
51 select GENERIC_GPIO
52 help 52 help
53 Enable gpiolib support for mpc5200 based boards 53 Enable gpiolib support for mpc5200 based boards
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 208005ca262c..e06420af5fe9 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
172 } 172 }
173} 173}
174 174
175static void tce_build_cell(struct iommu_table *tbl, long index, long npages, 175static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
176 unsigned long uaddr, enum dma_data_direction direction, 176 unsigned long uaddr, enum dma_data_direction direction,
177 struct dma_attrs *attrs) 177 struct dma_attrs *attrs)
178{ 178{
@@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
213 213
214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", 214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
215 index, npages, direction, base_pte); 215 index, npages, direction, base_pte);
216 return 0;
216} 217}
217 218
218static void tce_free_cell(struct iommu_table *tbl, long index, long npages) 219static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
@@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled;
1150 1151
1151static int __init setup_iommu_fixed(char *str) 1152static int __init setup_iommu_fixed(char *str)
1152{ 1153{
1154 struct device_node *pciep;
1155
1153 if (strcmp(str, "off") == 0) 1156 if (strcmp(str, "off") == 0)
1154 iommu_fixed_disabled = 1; 1157 iommu_fixed_disabled = 1;
1155 1158
1156 else if (strcmp(str, "weak") == 0) 1159 /* If we can find a pcie-endpoint in the device tree assume that
1160 * we're on a triblade or a CAB so by default the fixed mapping
1161 * should be set to be weakly ordered; but only if the boot
1162 * option WASN'T set for strong ordering
1163 */
1164 pciep = of_find_node_by_type(NULL, "pcie-endpoint");
1165
1166 if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
1157 iommu_fixed_is_weak = 1; 1167 iommu_fixed_is_weak = 1;
1158 1168
1169 of_node_put(pciep);
1170
1159 return 1; 1171 return 1;
1160} 1172}
1161__setup("iommu_fixed=", setup_iommu_fixed); 1173__setup("iommu_fixed=", setup_iommu_fixed);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 34654743363d..2deeeba7eccf 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -312,11 +312,28 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
312 */ 312 */
313 node = cpu_to_node(raw_smp_processor_id()); 313 node = cpu_to_node(raw_smp_processor_id());
314 for (n = 0; n < MAX_NUMNODES; n++, node++) { 314 for (n = 0; n < MAX_NUMNODES; n++, node++) {
315 int available_spus;
316
315 node = (node < MAX_NUMNODES) ? node : 0; 317 node = (node < MAX_NUMNODES) ? node : 0;
316 if (!node_allowed(ctx, node)) 318 if (!node_allowed(ctx, node))
317 continue; 319 continue;
320
321 available_spus = 0;
318 mutex_lock(&cbe_spu_info[node].list_mutex); 322 mutex_lock(&cbe_spu_info[node].list_mutex);
319 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 323 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
324 if (spu->ctx && spu->ctx->gang
325 && spu->ctx->aff_offset == 0)
326 available_spus -=
327 (spu->ctx->gang->contexts - 1);
328 else
329 available_spus++;
330 }
331 if (available_spus < ctx->gang->contexts) {
332 mutex_unlock(&cbe_spu_info[node].list_mutex);
333 continue;
334 }
335
336 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
320 if ((!mem_aff || spu->has_mem_affinity) && 337 if ((!mem_aff || spu->has_mem_affinity) &&
321 sched_spu(spu)) { 338 sched_spu(spu)) {
322 mutex_unlock(&cbe_spu_info[node].list_mutex); 339 mutex_unlock(&cbe_spu_info[node].list_mutex);
@@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx)
389 if (list_empty(&ctx->aff_list)) 406 if (list_empty(&ctx->aff_list))
390 return 0; 407 return 0;
391 408
409 if (atomic_read(&ctx->gang->aff_sched_count) == 0)
410 ctx->gang->aff_ref_spu = NULL;
411
392 if (!gang->aff_ref_spu) { 412 if (!gang->aff_ref_spu) {
393 if (!(gang->aff_flags & AFF_MERGED)) 413 if (!(gang->aff_flags & AFF_MERGED))
394 aff_merge_remaining_ctxs(gang); 414 aff_merge_remaining_ctxs(gang);
@@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
416 if (spu->ctx->flags & SPU_CREATE_NOSCHED) 436 if (spu->ctx->flags & SPU_CREATE_NOSCHED)
417 atomic_dec(&cbe_spu_info[spu->node].reserved_spus); 437 atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
418 438
419 if (ctx->gang){ 439 if (ctx->gang)
420 mutex_lock(&ctx->gang->aff_mutex); 440 atomic_dec_if_positive(&ctx->gang->aff_sched_count);
421 if (has_affinity(ctx)) {
422 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
423 ctx->gang->aff_ref_spu = NULL;
424 }
425 mutex_unlock(&ctx->gang->aff_mutex);
426 }
427 441
428 spu_switch_notify(spu, NULL); 442 spu_switch_notify(spu, NULL);
429 spu_unmap_mappings(ctx); 443 spu_unmap_mappings(ctx);
@@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
562 goto found; 576 goto found;
563 mutex_unlock(&cbe_spu_info[node].list_mutex); 577 mutex_unlock(&cbe_spu_info[node].list_mutex);
564 578
565 mutex_lock(&ctx->gang->aff_mutex); 579 atomic_dec(&ctx->gang->aff_sched_count);
566 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
567 ctx->gang->aff_ref_spu = NULL;
568 mutex_unlock(&ctx->gang->aff_mutex);
569 goto not_found; 580 goto not_found;
570 } 581 }
571 mutex_unlock(&ctx->gang->aff_mutex); 582 mutex_unlock(&ctx->gang->aff_mutex);
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 8c0e95766a62..92d20e993ede 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -196,8 +196,7 @@ static int __init sputrace_init(void)
196 struct proc_dir_entry *entry; 196 struct proc_dir_entry *entry;
197 int i, error = -ENOMEM; 197 int i, error = -ENOMEM;
198 198
199 sputrace_log = kcalloc(sizeof(struct sputrace), 199 sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
200 bufsize, GFP_KERNEL);
201 if (!sputrace_log) 200 if (!sputrace_log)
202 goto out; 201 goto out;
203 202
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index bc818e4e2033..bb464d1211b2 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
41#include <asm/iseries/hv_call_event.h> 41#include <asm/iseries/hv_call_event.h>
42#include <asm/iseries/iommu.h> 42#include <asm/iseries/iommu.h>
43 43
44static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, 44static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
45 unsigned long uaddr, enum dma_data_direction direction, 45 unsigned long uaddr, enum dma_data_direction direction,
46 struct dma_attrs *attrs) 46 struct dma_attrs *attrs)
47{ 47{
@@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
71 index++; 71 index++;
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 } 73 }
74 return 0;
74} 75}
75 76
76static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) 77static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 70541b7a5013..a0ff03a3d8da 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
83static struct iommu_table iommu_table_iobmap; 83static struct iommu_table iommu_table_iobmap;
84static int iommu_table_iobmap_inited; 84static int iommu_table_iobmap_inited;
85 85
86static void iobmap_build(struct iommu_table *tbl, long index, 86static int iobmap_build(struct iommu_table *tbl, long index,
87 long npages, unsigned long uaddr, 87 long npages, unsigned long uaddr,
88 enum dma_data_direction direction, 88 enum dma_data_direction direction,
89 struct dma_attrs *attrs) 89 struct dma_attrs *attrs)
@@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index,
108 uaddr += IOBMAP_PAGE_SIZE; 108 uaddr += IOBMAP_PAGE_SIZE;
109 bus_addr += IOBMAP_PAGE_SIZE; 109 bus_addr += IOBMAP_PAGE_SIZE;
110 } 110 }
111 return 0;
111} 112}
112 113
113 114
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 757c0296e0b8..97619fd51e39 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG
40 depends on PPC_PSERIES && PPC_EARLY_DEBUG 40 depends on PPC_PSERIES && PPC_EARLY_DEBUG
41 bool "Enable extra debug logging in platforms/pseries" 41 bool "Enable extra debug logging in platforms/pseries"
42 default y 42 default y
43
44config PPC_SMLPAR
45 bool "Support for shared-memory logical partitions"
46 depends on PPC_PSERIES
47 select LPARCFG
48 default n
49 help
50 Select this option to enable shared memory partition support.
51 With this option a system running in an LPAR can be given more
52 memory than physically available and will allow firmware to
53 balance memory across many LPARs.
54
55config CMM
56 tristate "Collaborative memory management"
57 depends on PPC_SMLPAR
58 default y
59 help
60 Select this option, if you want to enable the kernel interface
61 to reduce the memory size of the system. This is accomplished
62 by allocating pages of memory and put them "on hold". This only
63 makes sense for a system running in an LPAR where the unused pages
64 will be reused for other LPARs. The interface allows firmware to
65 balance memory across many LPARs.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 554c6e42ef2a..dfe574af2dc0 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
24obj-$(CONFIG_HVCS) += hvcserver.o 24obj-$(CONFIG_HVCS) += hvcserver.o
25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o 25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o 26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
27obj-$(CONFIG_CMM) += cmm.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 000000000000..c6b3be03168b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,468 @@
1/*
2 * Collaborative memory management interface.
3 *
4 * Copyright (C) 2008 IBM Corporation
5 * Author(s): Brian King (brking@linux.vnet.ibm.com),
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <linux/ctype.h>
24#include <linux/delay.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/init.h>
28#include <linux/kthread.h>
29#include <linux/module.h>
30#include <linux/oom.h>
31#include <linux/sched.h>
32#include <linux/stringify.h>
33#include <linux/swap.h>
34#include <linux/sysdev.h>
35#include <asm/firmware.h>
36#include <asm/hvcall.h>
37#include <asm/mmu.h>
38#include <asm/pgalloc.h>
39#include <asm/uaccess.h>
40
41#include "plpar_wrappers.h"
42
43#define CMM_DRIVER_VERSION "1.0.0"
44#define CMM_DEFAULT_DELAY 1
45#define CMM_DEBUG 0
46#define CMM_DISABLE 0
47#define CMM_OOM_KB 1024
48#define CMM_MIN_MEM_MB 256
49#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
50#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
51
52static unsigned int delay = CMM_DEFAULT_DELAY;
53static unsigned int oom_kb = CMM_OOM_KB;
54static unsigned int cmm_debug = CMM_DEBUG;
55static unsigned int cmm_disabled = CMM_DISABLE;
56static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
57static struct sys_device cmm_sysdev;
58
59MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
60MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
61MODULE_LICENSE("GPL");
62MODULE_VERSION(CMM_DRIVER_VERSION);
63
64module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
65MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
66 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
67module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
68MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
69 "[Default=" __stringify(CMM_OOM_KB) "]");
70module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
71MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
72 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
73module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
74MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
75 "[Default=" __stringify(CMM_DEBUG) "]");
76
77#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
78
79#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
80
81struct cmm_page_array {
82 struct cmm_page_array *next;
83 unsigned long index;
84 unsigned long page[CMM_NR_PAGES];
85};
86
87static unsigned long loaned_pages;
88static unsigned long loaned_pages_target;
89static unsigned long oom_freed_pages;
90
91static struct cmm_page_array *cmm_page_list;
92static DEFINE_SPINLOCK(cmm_lock);
93
94static struct task_struct *cmm_thread_ptr;
95
96/**
97 * cmm_alloc_pages - Allocate pages and mark them as loaned
98 * @nr: number of pages to allocate
99 *
100 * Return value:
101 * number of pages requested to be allocated which were not
102 **/
103static long cmm_alloc_pages(long nr)
104{
105 struct cmm_page_array *pa, *npa;
106 unsigned long addr;
107 long rc;
108
109 cmm_dbg("Begin request for %ld pages\n", nr);
110
111 while (nr) {
112 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
113 __GFP_NORETRY | __GFP_NOMEMALLOC);
114 if (!addr)
115 break;
116 spin_lock(&cmm_lock);
117 pa = cmm_page_list;
118 if (!pa || pa->index >= CMM_NR_PAGES) {
119 /* Need a new page for the page list. */
120 spin_unlock(&cmm_lock);
121 npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
122 __GFP_NORETRY | __GFP_NOMEMALLOC);
123 if (!npa) {
124 pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
125 free_page(addr);
126 break;
127 }
128 spin_lock(&cmm_lock);
129 pa = cmm_page_list;
130
131 if (!pa || pa->index >= CMM_NR_PAGES) {
132 npa->next = pa;
133 npa->index = 0;
134 pa = npa;
135 cmm_page_list = pa;
136 } else
137 free_page((unsigned long) npa);
138 }
139
140 if ((rc = plpar_page_set_loaned(__pa(addr)))) {
141 pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
142 spin_unlock(&cmm_lock);
143 free_page(addr);
144 break;
145 }
146
147 pa->page[pa->index++] = addr;
148 loaned_pages++;
149 totalram_pages--;
150 spin_unlock(&cmm_lock);
151 nr--;
152 }
153
154 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
155 return nr;
156}
157
158/**
159 * cmm_free_pages - Free pages and mark them as active
160 * @nr: number of pages to free
161 *
162 * Return value:
163 * number of pages requested to be freed which were not
164 **/
165static long cmm_free_pages(long nr)
166{
167 struct cmm_page_array *pa;
168 unsigned long addr;
169
170 cmm_dbg("Begin free of %ld pages.\n", nr);
171 spin_lock(&cmm_lock);
172 pa = cmm_page_list;
173 while (nr) {
174 if (!pa || pa->index <= 0)
175 break;
176 addr = pa->page[--pa->index];
177
178 if (pa->index == 0) {
179 pa = pa->next;
180 free_page((unsigned long) cmm_page_list);
181 cmm_page_list = pa;
182 }
183
184 plpar_page_set_active(__pa(addr));
185 free_page(addr);
186 loaned_pages--;
187 nr--;
188 totalram_pages++;
189 }
190 spin_unlock(&cmm_lock);
191 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
192 return nr;
193}
194
195/**
196 * cmm_oom_notify - OOM notifier
197 * @self: notifier block struct
198 * @dummy: not used
199 * @parm: returned - number of pages freed
200 *
201 * Return value:
202 * NOTIFY_OK
203 **/
204static int cmm_oom_notify(struct notifier_block *self,
205 unsigned long dummy, void *parm)
206{
207 unsigned long *freed = parm;
208 long nr = KB2PAGES(oom_kb);
209
210 cmm_dbg("OOM processing started\n");
211 nr = cmm_free_pages(nr);
212 loaned_pages_target = loaned_pages;
213 *freed += KB2PAGES(oom_kb) - nr;
214 oom_freed_pages += KB2PAGES(oom_kb) - nr;
215 cmm_dbg("OOM processing complete\n");
216 return NOTIFY_OK;
217}
218
219/**
220 * cmm_get_mpp - Read memory performance parameters
221 *
222 * Makes hcall to query the current page loan request from the hypervisor.
223 *
224 * Return value:
225 * nothing
226 **/
227static void cmm_get_mpp(void)
228{
229 int rc;
230 struct hvcall_mpp_data mpp_data;
231 unsigned long active_pages_target;
232 signed long page_loan_request;
233
234 rc = h_get_mpp(&mpp_data);
235
236 if (rc != H_SUCCESS)
237 return;
238
239 page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
240 loaned_pages_target = page_loan_request + loaned_pages;
241 if (loaned_pages_target > oom_freed_pages)
242 loaned_pages_target -= oom_freed_pages;
243 else
244 loaned_pages_target = 0;
245
246 active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
247
248 if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
249 loaned_pages_target = totalram_pages + loaned_pages -
250 ((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
251
252 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
253 page_loan_request, loaned_pages, loaned_pages_target,
254 oom_freed_pages, totalram_pages);
255}
256
257static struct notifier_block cmm_oom_nb = {
258 .notifier_call = cmm_oom_notify
259};
260
261/**
262 * cmm_thread - CMM task thread
263 * @dummy: not used
264 *
265 * Return value:
266 * 0
267 **/
268static int cmm_thread(void *dummy)
269{
270 unsigned long timeleft;
271
272 while (1) {
273 timeleft = msleep_interruptible(delay * 1000);
274
275 if (kthread_should_stop() || timeleft) {
276 loaned_pages_target = loaned_pages;
277 break;
278 }
279
280 cmm_get_mpp();
281
282 if (loaned_pages_target > loaned_pages) {
283 if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
284 loaned_pages_target = loaned_pages;
285 } else if (loaned_pages_target < loaned_pages)
286 cmm_free_pages(loaned_pages - loaned_pages_target);
287 }
288 return 0;
289}
290
291#define CMM_SHOW(name, format, args...) \
292 static ssize_t show_##name(struct sys_device *dev, char *buf) \
293 { \
294 return sprintf(buf, format, ##args); \
295 } \
296 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
297
298CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
299CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
300
301static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
302{
303 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
304}
305
306static ssize_t store_oom_pages(struct sys_device *dev,
307 const char *buf, size_t count)
308{
309 unsigned long val = simple_strtoul (buf, NULL, 10);
310
311 if (!capable(CAP_SYS_ADMIN))
312 return -EPERM;
313 if (val != 0)
314 return -EBADMSG;
315
316 oom_freed_pages = 0;
317 return count;
318}
319
320static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
321 show_oom_pages, store_oom_pages);
322
323static struct sysdev_attribute *cmm_attrs[] = {
324 &attr_loaned_kb,
325 &attr_loaned_target_kb,
326 &attr_oom_freed_kb,
327};
328
329static struct sysdev_class cmm_sysdev_class = {
330 .name = "cmm",
331};
332
333/**
334 * cmm_sysfs_register - Register with sysfs
335 *
336 * Return value:
337 * 0 on success / other on failure
338 **/
339static int cmm_sysfs_register(struct sys_device *sysdev)
340{
341 int i, rc;
342
343 if ((rc = sysdev_class_register(&cmm_sysdev_class)))
344 return rc;
345
346 sysdev->id = 0;
347 sysdev->cls = &cmm_sysdev_class;
348
349 if ((rc = sysdev_register(sysdev)))
350 goto class_unregister;
351
352 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
353 if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
354 goto fail;
355 }
356
357 return 0;
358
359fail:
360 while (--i >= 0)
361 sysdev_remove_file(sysdev, cmm_attrs[i]);
362 sysdev_unregister(sysdev);
363class_unregister:
364 sysdev_class_unregister(&cmm_sysdev_class);
365 return rc;
366}
367
368/**
369 * cmm_unregister_sysfs - Unregister from sysfs
370 *
371 **/
372static void cmm_unregister_sysfs(struct sys_device *sysdev)
373{
374 int i;
375
376 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
377 sysdev_remove_file(sysdev, cmm_attrs[i]);
378 sysdev_unregister(sysdev);
379 sysdev_class_unregister(&cmm_sysdev_class);
380}
381
382/**
383 * cmm_init - Module initialization
384 *
385 * Return value:
386 * 0 on success / other on failure
387 **/
388static int cmm_init(void)
389{
390 int rc = -ENOMEM;
391
392 if (!firmware_has_feature(FW_FEATURE_CMO))
393 return -EOPNOTSUPP;
394
395 if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
396 return rc;
397
398 if ((rc = cmm_sysfs_register(&cmm_sysdev)))
399 goto out_oom_notifier;
400
401 if (cmm_disabled)
402 return rc;
403
404 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
405 if (IS_ERR(cmm_thread_ptr)) {
406 rc = PTR_ERR(cmm_thread_ptr);
407 goto out_unregister_sysfs;
408 }
409
410 return rc;
411
412out_unregister_sysfs:
413 cmm_unregister_sysfs(&cmm_sysdev);
414out_oom_notifier:
415 unregister_oom_notifier(&cmm_oom_nb);
416 return rc;
417}
418
419/**
420 * cmm_exit - Module exit
421 *
422 * Return value:
423 * nothing
424 **/
425static void cmm_exit(void)
426{
427 if (cmm_thread_ptr)
428 kthread_stop(cmm_thread_ptr);
429 unregister_oom_notifier(&cmm_oom_nb);
430 cmm_free_pages(loaned_pages);
431 cmm_unregister_sysfs(&cmm_sysdev);
432}
433
434/**
435 * cmm_set_disable - Disable/Enable CMM
436 *
437 * Return value:
438 * 0 on success / other on failure
439 **/
440static int cmm_set_disable(const char *val, struct kernel_param *kp)
441{
442 int disable = simple_strtoul(val, NULL, 10);
443
444 if (disable != 0 && disable != 1)
445 return -EINVAL;
446
447 if (disable && !cmm_disabled) {
448 if (cmm_thread_ptr)
449 kthread_stop(cmm_thread_ptr);
450 cmm_thread_ptr = NULL;
451 cmm_free_pages(loaned_pages);
452 } else if (!disable && cmm_disabled) {
453 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
454 if (IS_ERR(cmm_thread_ptr))
455 return PTR_ERR(cmm_thread_ptr);
456 }
457
458 cmm_disabled = disable;
459 return 0;
460}
461
462module_param_call(disable, cmm_set_disable, param_get_uint,
463 &cmm_disabled, S_IRUGO | S_IWUSR);
464MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
465 "[Default=" __stringify(CMM_DISABLE) "]");
466
467module_init(cmm_init);
468module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 5377dd4b849a..a8c446697f9e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -48,7 +48,7 @@
48#include "plpar_wrappers.h" 48#include "plpar_wrappers.h"
49 49
50 50
51static void tce_build_pSeries(struct iommu_table *tbl, long index, 51static int tce_build_pSeries(struct iommu_table *tbl, long index,
52 long npages, unsigned long uaddr, 52 long npages, unsigned long uaddr,
53 enum dma_data_direction direction, 53 enum dma_data_direction direction,
54 struct dma_attrs *attrs) 54 struct dma_attrs *attrs)
@@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 tcep++; 73 tcep++;
74 } 74 }
75 return 0;
75} 76}
76 77
77 78
@@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
94 return *tcep; 95 return *tcep;
95} 96}
96 97
97static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, 98static void tce_free_pSeriesLP(struct iommu_table*, long, long);
99static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
100
101static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
98 long npages, unsigned long uaddr, 102 long npages, unsigned long uaddr,
99 enum dma_data_direction direction, 103 enum dma_data_direction direction,
100 struct dma_attrs *attrs) 104 struct dma_attrs *attrs)
101{ 105{
102 u64 rc; 106 u64 rc = 0;
103 u64 proto_tce, tce; 107 u64 proto_tce, tce;
104 u64 rpn; 108 u64 rpn;
109 int ret = 0;
110 long tcenum_start = tcenum, npages_start = npages;
105 111
106 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; 112 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
107 proto_tce = TCE_PCI_READ; 113 proto_tce = TCE_PCI_READ;
@@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
112 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; 118 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
113 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); 119 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
114 120
121 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
122 ret = (int)rc;
123 tce_free_pSeriesLP(tbl, tcenum_start,
124 (npages_start - (npages + 1)));
125 break;
126 }
127
115 if (rc && printk_ratelimit()) { 128 if (rc && printk_ratelimit()) {
116 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 129 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
117 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 130 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
123 tcenum++; 136 tcenum++;
124 rpn++; 137 rpn++;
125 } 138 }
139 return ret;
126} 140}
127 141
128static DEFINE_PER_CPU(u64 *, tce_page) = NULL; 142static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
129 143
130static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, 144static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
131 long npages, unsigned long uaddr, 145 long npages, unsigned long uaddr,
132 enum dma_data_direction direction, 146 enum dma_data_direction direction,
133 struct dma_attrs *attrs) 147 struct dma_attrs *attrs)
134{ 148{
135 u64 rc; 149 u64 rc = 0;
136 u64 proto_tce; 150 u64 proto_tce;
137 u64 *tcep; 151 u64 *tcep;
138 u64 rpn; 152 u64 rpn;
139 long l, limit; 153 long l, limit;
154 long tcenum_start = tcenum, npages_start = npages;
155 int ret = 0;
140 156
141 if (npages == 1) { 157 if (npages == 1) {
142 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 158 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
143 direction, attrs); 159 direction, attrs);
144 return;
145 } 160 }
146 161
147 tcep = __get_cpu_var(tce_page); 162 tcep = __get_cpu_var(tce_page);
@@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
153 tcep = (u64 *)__get_free_page(GFP_ATOMIC); 168 tcep = (u64 *)__get_free_page(GFP_ATOMIC);
154 /* If allocation fails, fall back to the loop implementation */ 169 /* If allocation fails, fall back to the loop implementation */
155 if (!tcep) { 170 if (!tcep) {
156 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 171 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
157 direction, attrs); 172 direction, attrs);
158 return;
159 } 173 }
160 __get_cpu_var(tce_page) = tcep; 174 __get_cpu_var(tce_page) = tcep;
161 } 175 }
@@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
187 tcenum += limit; 201 tcenum += limit;
188 } while (npages > 0 && !rc); 202 } while (npages > 0 && !rc);
189 203
204 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
205 ret = (int)rc;
206 tce_freemulti_pSeriesLP(tbl, tcenum_start,
207 (npages_start - (npages + limit)));
208 return ret;
209 }
210
190 if (rc && printk_ratelimit()) { 211 if (rc && printk_ratelimit()) {
191 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 212 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
192 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 213 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
194 printk("\ttce[0] val = 0x%lx\n", tcep[0]); 215 printk("\ttce[0] val = 0x%lx\n", tcep[0]);
195 show_stack(current, (unsigned long *)__get_SP()); 216 show_stack(current, (unsigned long *)__get_SP());
196 } 217 }
218 return ret;
197} 219}
198 220
199static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) 221static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d8680b589dc9..a437267c6bf8 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
42 return vpa_call(0x3, cpu, vpa); 42 return vpa_call(0x3, cpu, vpa);
43} 43}
44 44
45static inline long plpar_page_set_loaned(unsigned long vpa)
46{
47 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
48}
49
50static inline long plpar_page_set_active(unsigned long vpa)
51{
52 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
53}
54
45extern void vpa_init(int cpu); 55extern void vpa_init(int cpu);
46 56
47static inline long plpar_pte_enter(unsigned long flags, 57static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 90beb444e1dd..063a0d2fba30 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr)
314 H_DABRX_KERNEL | H_DABRX_USER); 314 H_DABRX_KERNEL | H_DABRX_USER);
315} 315}
316 316
317#define CMO_CHARACTERISTICS_TOKEN 44
318#define CMO_MAXLENGTH 1026
319
320/**
321 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
322 * handle that here. (Stolen from parse_system_parameter_string)
323 */
324void pSeries_cmo_feature_init(void)
325{
326 char *ptr, *key, *value, *end;
327 int call_status;
328 int PrPSP = -1;
329 int SecPSP = -1;
330
331 pr_debug(" -> fw_cmo_feature_init()\n");
332 spin_lock(&rtas_data_buf_lock);
333 memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
334 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
335 NULL,
336 CMO_CHARACTERISTICS_TOKEN,
337 __pa(rtas_data_buf),
338 RTAS_DATA_BUF_SIZE);
339
340 if (call_status != 0) {
341 spin_unlock(&rtas_data_buf_lock);
342 pr_debug("CMO not available\n");
343 pr_debug(" <- fw_cmo_feature_init()\n");
344 return;
345 }
346
347 end = rtas_data_buf + CMO_MAXLENGTH - 2;
348 ptr = rtas_data_buf + 2; /* step over strlen value */
349 key = value = ptr;
350
351 while (*ptr && (ptr <= end)) {
352 /* Separate the key and value by replacing '=' with '\0' and
353 * point the value at the string after the '='
354 */
355 if (ptr[0] == '=') {
356 ptr[0] = '\0';
357 value = ptr + 1;
358 } else if (ptr[0] == '\0' || ptr[0] == ',') {
359 /* Terminate the string containing the key/value pair */
360 ptr[0] = '\0';
361
362 if (key == value) {
363 pr_debug("Malformed key/value pair\n");
364 /* Never found a '=', end processing */
365 break;
366 }
367
368 if (0 == strcmp(key, "PrPSP"))
369 PrPSP = simple_strtol(value, NULL, 10);
370 else if (0 == strcmp(key, "SecPSP"))
371 SecPSP = simple_strtol(value, NULL, 10);
372 value = key = ptr + 1;
373 }
374 ptr++;
375 }
376
377 if (PrPSP != -1 || SecPSP != -1) {
378 pr_info("CMO enabled\n");
379 pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
380 powerpc_firmware_features |= FW_FEATURE_CMO;
381 } else
382 pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
383 spin_unlock(&rtas_data_buf_lock);
384 pr_debug(" <- fw_cmo_feature_init()\n");
385}
386
317/* 387/*
318 * Early initialization. Relocation is on but do not reference unbolted pages 388 * Early initialization. Relocation is on but do not reference unbolted pages
319 */ 389 */
@@ -329,6 +399,7 @@ static void __init pSeries_init_early(void)
329 else if (firmware_has_feature(FW_FEATURE_XDABR)) 399 else if (firmware_has_feature(FW_FEATURE_XDABR))
330 ppc_md.set_dabr = pseries_set_xdabr; 400 ppc_md.set_dabr = pseries_set_xdabr;
331 401
402 pSeries_cmo_feature_init();
332 iommu_init_early_pSeries(); 403 iommu_init_early_pSeries();
333 404
334 pr_debug(" <- pSeries_init_early()\n"); 405 pr_debug(" <- pSeries_init_early()\n");
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index de8c8b542cfa..89639ecbf381 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl)
147 } 147 }
148} 148}
149 149
150static void dart_build(struct iommu_table *tbl, long index, 150static int dart_build(struct iommu_table *tbl, long index,
151 long npages, unsigned long uaddr, 151 long npages, unsigned long uaddr,
152 enum dma_data_direction direction, 152 enum dma_data_direction direction,
153 struct dma_attrs *attrs) 153 struct dma_attrs *attrs)
@@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index,
184 } else { 184 } else {
185 dart_dirty = 1; 185 dart_dirty = 1;
186 } 186 }
187 return 0;
187} 188}
188 189
189 190
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 00527805e4f1..e5a6e2e84540 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -33,6 +33,7 @@
33*/ 33*/
34 34
35#include <linux/module.h> 35#include <linux/module.h>
36#include <linux/moduleparam.h>
36#include <linux/types.h> 37#include <linux/types.h>
37#include <linux/errno.h> 38#include <linux/errno.h>
38#include <linux/ioport.h> 39#include <linux/ioport.h>
@@ -52,7 +53,9 @@
52#include <asm/hvcall.h> 53#include <asm/hvcall.h>
53#include <asm/atomic.h> 54#include <asm/atomic.h>
54#include <asm/vio.h> 55#include <asm/vio.h>
56#include <asm/iommu.h>
55#include <asm/uaccess.h> 57#include <asm/uaccess.h>
58#include <asm/firmware.h>
56#include <linux/seq_file.h> 59#include <linux/seq_file.h>
57 60
58#include "ibmveth.h" 61#include "ibmveth.h"
@@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter);
94static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter); 97static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
95static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); 98static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
96static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); 99static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
100static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
97static struct kobj_type ktype_veth_pool; 101static struct kobj_type ktype_veth_pool;
98 102
103
99#ifdef CONFIG_PROC_FS 104#ifdef CONFIG_PROC_FS
100#define IBMVETH_PROC_DIR "ibmveth" 105#define IBMVETH_PROC_DIR "ibmveth"
101static struct proc_dir_entry *ibmveth_proc_dir; 106static struct proc_dir_entry *ibmveth_proc_dir;
@@ -226,16 +231,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
226 u32 i; 231 u32 i;
227 u32 count = pool->size - atomic_read(&pool->available); 232 u32 count = pool->size - atomic_read(&pool->available);
228 u32 buffers_added = 0; 233 u32 buffers_added = 0;
234 struct sk_buff *skb;
235 unsigned int free_index, index;
236 u64 correlator;
237 unsigned long lpar_rc;
238 dma_addr_t dma_addr;
229 239
230 mb(); 240 mb();
231 241
232 for(i = 0; i < count; ++i) { 242 for(i = 0; i < count; ++i) {
233 struct sk_buff *skb;
234 unsigned int free_index, index;
235 u64 correlator;
236 union ibmveth_buf_desc desc; 243 union ibmveth_buf_desc desc;
237 unsigned long lpar_rc;
238 dma_addr_t dma_addr;
239 244
240 skb = alloc_skb(pool->buff_size, GFP_ATOMIC); 245 skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
241 246
@@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
255 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, 260 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
256 pool->buff_size, DMA_FROM_DEVICE); 261 pool->buff_size, DMA_FROM_DEVICE);
257 262
263 if (dma_mapping_error(dma_addr))
264 goto failure;
265
258 pool->free_map[free_index] = IBM_VETH_INVALID_MAP; 266 pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
259 pool->dma_addr[index] = dma_addr; 267 pool->dma_addr[index] = dma_addr;
260 pool->skbuff[index] = skb; 268 pool->skbuff[index] = skb;
@@ -267,20 +275,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
267 275
268 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); 276 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
269 277
270 if(lpar_rc != H_SUCCESS) { 278 if (lpar_rc != H_SUCCESS)
271 pool->free_map[free_index] = index; 279 goto failure;
272 pool->skbuff[index] = NULL; 280 else {
273 if (pool->consumer_index == 0)
274 pool->consumer_index = pool->size - 1;
275 else
276 pool->consumer_index--;
277 dma_unmap_single(&adapter->vdev->dev,
278 pool->dma_addr[index], pool->buff_size,
279 DMA_FROM_DEVICE);
280 dev_kfree_skb_any(skb);
281 adapter->replenish_add_buff_failure++;
282 break;
283 } else {
284 buffers_added++; 281 buffers_added++;
285 adapter->replenish_add_buff_success++; 282 adapter->replenish_add_buff_success++;
286 } 283 }
@@ -288,6 +285,24 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
288 285
289 mb(); 286 mb();
290 atomic_add(buffers_added, &(pool->available)); 287 atomic_add(buffers_added, &(pool->available));
288 return;
289
290failure:
291 pool->free_map[free_index] = index;
292 pool->skbuff[index] = NULL;
293 if (pool->consumer_index == 0)
294 pool->consumer_index = pool->size - 1;
295 else
296 pool->consumer_index--;
297 if (!dma_mapping_error(dma_addr))
298 dma_unmap_single(&adapter->vdev->dev,
299 pool->dma_addr[index], pool->buff_size,
300 DMA_FROM_DEVICE);
301 dev_kfree_skb_any(skb);
302 adapter->replenish_add_buff_failure++;
303
304 mb();
305 atomic_add(buffers_added, &(pool->available));
291} 306}
292 307
293/* replenish routine */ 308/* replenish routine */
@@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
297 312
298 adapter->replenish_task_cycles++; 313 adapter->replenish_task_cycles++;
299 314
300 for(i = 0; i < IbmVethNumBufferPools; i++) 315 for (i = (IbmVethNumBufferPools - 1); i >= 0; i--)
301 if(adapter->rx_buff_pool[i].active) 316 if(adapter->rx_buff_pool[i].active)
302 ibmveth_replenish_buffer_pool(adapter, 317 ibmveth_replenish_buffer_pool(adapter,
303 &adapter->rx_buff_pool[i]); 318 &adapter->rx_buff_pool[i]);
@@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
472 if (adapter->rx_buff_pool[i].active) 487 if (adapter->rx_buff_pool[i].active)
473 ibmveth_free_buffer_pool(adapter, 488 ibmveth_free_buffer_pool(adapter,
474 &adapter->rx_buff_pool[i]); 489 &adapter->rx_buff_pool[i]);
490
491 if (adapter->bounce_buffer != NULL) {
492 if (!dma_mapping_error(adapter->bounce_buffer_dma)) {
493 dma_unmap_single(&adapter->vdev->dev,
494 adapter->bounce_buffer_dma,
495 adapter->netdev->mtu + IBMVETH_BUFF_OH,
496 DMA_BIDIRECTIONAL);
497 adapter->bounce_buffer_dma = DMA_ERROR_CODE;
498 }
499 kfree(adapter->bounce_buffer);
500 adapter->bounce_buffer = NULL;
501 }
475} 502}
476 503
477static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, 504static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
@@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev)
607 return rc; 634 return rc;
608 } 635 }
609 636
637 adapter->bounce_buffer =
638 kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL);
639 if (!adapter->bounce_buffer) {
640 ibmveth_error_printk("unable to allocate bounce buffer\n");
641 ibmveth_cleanup(adapter);
642 napi_disable(&adapter->napi);
643 return -ENOMEM;
644 }
645 adapter->bounce_buffer_dma =
646 dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
647 netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
648 if (dma_mapping_error(adapter->bounce_buffer_dma)) {
649 ibmveth_error_printk("unable to map bounce buffer\n");
650 ibmveth_cleanup(adapter);
651 napi_disable(&adapter->napi);
652 return -ENOMEM;
653 }
654
610 ibmveth_debug_printk("initial replenish cycle\n"); 655 ibmveth_debug_printk("initial replenish cycle\n");
611 ibmveth_interrupt(netdev->irq, netdev); 656 ibmveth_interrupt(netdev->irq, netdev);
612 657
@@ -853,10 +898,12 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
853 unsigned int tx_packets = 0; 898 unsigned int tx_packets = 0;
854 unsigned int tx_send_failed = 0; 899 unsigned int tx_send_failed = 0;
855 unsigned int tx_map_failed = 0; 900 unsigned int tx_map_failed = 0;
901 int used_bounce = 0;
902 unsigned long data_dma_addr;
856 903
857 desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len; 904 desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len;
858 desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data, 905 data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
859 skb->len, DMA_TO_DEVICE); 906 skb->len, DMA_TO_DEVICE);
860 907
861 if (skb->ip_summed == CHECKSUM_PARTIAL && 908 if (skb->ip_summed == CHECKSUM_PARTIAL &&
862 ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) { 909 ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) {
@@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
875 buf[1] = 0; 922 buf[1] = 0;
876 } 923 }
877 924
878 if (dma_mapping_error(desc.fields.address)) { 925 if (dma_mapping_error(data_dma_addr)) {
879 ibmveth_error_printk("tx: unable to map xmit buffer\n"); 926 if (!firmware_has_feature(FW_FEATURE_CMO))
927 ibmveth_error_printk("tx: unable to map xmit buffer\n");
928 skb_copy_from_linear_data(skb, adapter->bounce_buffer,
929 skb->len);
930 desc.fields.address = adapter->bounce_buffer_dma;
880 tx_map_failed++; 931 tx_map_failed++;
881 tx_dropped++; 932 used_bounce = 1;
882 goto out; 933 } else
883 } 934 desc.fields.address = data_dma_addr;
884 935
885 /* send the frame. Arbitrarily set retrycount to 1024 */ 936 /* send the frame. Arbitrarily set retrycount to 1024 */
886 correlator = 0; 937 correlator = 0;
@@ -904,8 +955,9 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
904 netdev->trans_start = jiffies; 955 netdev->trans_start = jiffies;
905 } 956 }
906 957
907 dma_unmap_single(&adapter->vdev->dev, desc.fields.address, 958 if (!used_bounce)
908 skb->len, DMA_TO_DEVICE); 959 dma_unmap_single(&adapter->vdev->dev, data_dma_addr,
960 skb->len, DMA_TO_DEVICE);
909 961
910out: spin_lock_irqsave(&adapter->stats_lock, flags); 962out: spin_lock_irqsave(&adapter->stats_lock, flags);
911 netdev->stats.tx_dropped += tx_dropped; 963 netdev->stats.tx_dropped += tx_dropped;
@@ -1053,9 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
1053static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) 1105static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1054{ 1106{
1055 struct ibmveth_adapter *adapter = dev->priv; 1107 struct ibmveth_adapter *adapter = dev->priv;
1108 struct vio_dev *viodev = adapter->vdev;
1056 int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; 1109 int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
1057 int reinit = 0; 1110 int i;
1058 int i, rc;
1059 1111
1060 if (new_mtu < IBMVETH_MAX_MTU) 1112 if (new_mtu < IBMVETH_MAX_MTU)
1061 return -EINVAL; 1113 return -EINVAL;
@@ -1067,23 +1119,34 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
1067 if (i == IbmVethNumBufferPools) 1119 if (i == IbmVethNumBufferPools)
1068 return -EINVAL; 1120 return -EINVAL;
1069 1121
1122 /* Deactivate all the buffer pools so that the next loop can activate
1123 only the buffer pools necessary to hold the new MTU */
1124 for (i = 0; i < IbmVethNumBufferPools; i++)
1125 if (adapter->rx_buff_pool[i].active) {
1126 ibmveth_free_buffer_pool(adapter,
1127 &adapter->rx_buff_pool[i]);
1128 adapter->rx_buff_pool[i].active = 0;
1129 }
1130
1070 /* Look for an active buffer pool that can hold the new MTU */ 1131 /* Look for an active buffer pool that can hold the new MTU */
1071 for(i = 0; i<IbmVethNumBufferPools; i++) { 1132 for(i = 0; i<IbmVethNumBufferPools; i++) {
1072 if (!adapter->rx_buff_pool[i].active) { 1133 adapter->rx_buff_pool[i].active = 1;
1073 adapter->rx_buff_pool[i].active = 1;
1074 reinit = 1;
1075 }
1076 1134
1077 if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { 1135 if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
1078 if (reinit && netif_running(adapter->netdev)) { 1136 if (netif_running(adapter->netdev)) {
1079 adapter->pool_config = 1; 1137 adapter->pool_config = 1;
1080 ibmveth_close(adapter->netdev); 1138 ibmveth_close(adapter->netdev);
1081 adapter->pool_config = 0; 1139 adapter->pool_config = 0;
1082 dev->mtu = new_mtu; 1140 dev->mtu = new_mtu;
1083 if ((rc = ibmveth_open(adapter->netdev))) 1141 vio_cmo_set_dev_desired(viodev,
1084 return rc; 1142 ibmveth_get_desired_dma
1085 } else 1143 (viodev));
1086 dev->mtu = new_mtu; 1144 return ibmveth_open(adapter->netdev);
1145 }
1146 dev->mtu = new_mtu;
1147 vio_cmo_set_dev_desired(viodev,
1148 ibmveth_get_desired_dma
1149 (viodev));
1087 return 0; 1150 return 0;
1088 } 1151 }
1089 } 1152 }
@@ -1098,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev)
1098} 1161}
1099#endif 1162#endif
1100 1163
1164/**
1165 * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
1166 *
1167 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1168 *
1169 * Return value:
1170 * Number of bytes of IO data the driver will need to perform well.
1171 */
1172static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
1173{
1174 struct net_device *netdev = dev_get_drvdata(&vdev->dev);
1175 struct ibmveth_adapter *adapter;
1176 unsigned long ret;
1177 int i;
1178 int rxqentries = 1;
1179
1180 /* netdev inits at probe time along with the structures we need below*/
1181 if (netdev == NULL)
1182 return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
1183
1184 adapter = netdev_priv(netdev);
1185
1186 ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
1187 ret += IOMMU_PAGE_ALIGN(netdev->mtu);
1188
1189 for (i = 0; i < IbmVethNumBufferPools; i++) {
1190 /* add the size of the active receive buffers */
1191 if (adapter->rx_buff_pool[i].active)
1192 ret +=
1193 adapter->rx_buff_pool[i].size *
1194 IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
1195 buff_size);
1196 rxqentries += adapter->rx_buff_pool[i].size;
1197 }
1198 /* add the size of the receive queue entries */
1199 ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
1200
1201 return ret;
1202}
1203
1101static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) 1204static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
1102{ 1205{
1103 int rc, i; 1206 int rc, i;
@@ -1242,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev)
1242 ibmveth_proc_unregister_adapter(adapter); 1345 ibmveth_proc_unregister_adapter(adapter);
1243 1346
1244 free_netdev(netdev); 1347 free_netdev(netdev);
1348 dev_set_drvdata(&dev->dev, NULL);
1349
1245 return 0; 1350 return 0;
1246} 1351}
1247 1352
@@ -1402,14 +1507,15 @@ const char * buf, size_t count)
1402 return -EPERM; 1507 return -EPERM;
1403 } 1508 }
1404 1509
1405 pool->active = 0;
1406 if (netif_running(netdev)) { 1510 if (netif_running(netdev)) {
1407 adapter->pool_config = 1; 1511 adapter->pool_config = 1;
1408 ibmveth_close(netdev); 1512 ibmveth_close(netdev);
1513 pool->active = 0;
1409 adapter->pool_config = 0; 1514 adapter->pool_config = 0;
1410 if ((rc = ibmveth_open(netdev))) 1515 if ((rc = ibmveth_open(netdev)))
1411 return rc; 1516 return rc;
1412 } 1517 }
1518 pool->active = 0;
1413 } 1519 }
1414 } else if (attr == &veth_num_attr) { 1520 } else if (attr == &veth_num_attr) {
1415 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) 1521 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
@@ -1485,6 +1591,7 @@ static struct vio_driver ibmveth_driver = {
1485 .id_table = ibmveth_device_table, 1591 .id_table = ibmveth_device_table,
1486 .probe = ibmveth_probe, 1592 .probe = ibmveth_probe,
1487 .remove = ibmveth_remove, 1593 .remove = ibmveth_remove,
1594 .get_desired_dma = ibmveth_get_desired_dma,
1488 .driver = { 1595 .driver = {
1489 .name = ibmveth_driver_name, 1596 .name = ibmveth_driver_name,
1490 .owner = THIS_MODULE, 1597 .owner = THIS_MODULE,
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index 41f61cd18852..d28186948752 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -93,9 +93,12 @@ static inline long h_illan_attributes(unsigned long unit_address,
93 plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) 93 plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
94 94
95#define IbmVethNumBufferPools 5 95#define IbmVethNumBufferPools 5
96#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */
96#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ 97#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
97#define IBMVETH_MAX_MTU 68 98#define IBMVETH_MAX_MTU 68
98#define IBMVETH_MAX_POOL_COUNT 4096 99#define IBMVETH_MAX_POOL_COUNT 4096
100#define IBMVETH_BUFF_LIST_SIZE 4096
101#define IBMVETH_FILT_LIST_SIZE 4096
99#define IBMVETH_MAX_BUF_SIZE (1024 * 128) 102#define IBMVETH_MAX_BUF_SIZE (1024 * 128)
100 103
101static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; 104static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
@@ -143,6 +146,8 @@ struct ibmveth_adapter {
143 struct ibmveth_rx_q rx_queue; 146 struct ibmveth_rx_q rx_queue;
144 int pool_config; 147 int pool_config;
145 int rx_csum; 148 int rx_csum;
149 void *bounce_buffer;
150 dma_addr_t bounce_buffer_dma;
146 151
147 /* adapter specific stats */ 152 /* adapter specific stats */
148 u64 replenish_task_cycles; 153 u64 replenish_task_cycles;
diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index 5c015d310d4a..344e1b03dd8b 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -91,8 +91,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
91 } 91 }
92 92
93 info.irq = irq_of_parse_and_map(node, 0); 93 info.irq = irq_of_parse_and_map(node, 0);
94 if (info.irq == NO_IRQ)
95 info.irq = -1;
96 94
97 if (of_find_i2c_driver(node, &info) < 0) { 95 if (of_find_i2c_driver(node, &info) < 0) {
98 irq_dispose_mapping(info.irq); 96 irq_dispose_mapping(info.irq);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index eb702b96d57c..c4a7c06793c5 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3819,6 +3819,20 @@ static int ibmvfc_remove(struct vio_dev *vdev)
3819 return 0; 3819 return 0;
3820} 3820}
3821 3821
3822/**
3823 * ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver
3824 * @vdev: vio device struct
3825 *
3826 * Return value:
3827 * Number of bytes the driver will need to DMA map at the same time in
3828 * order to perform well.
3829 */
3830static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev)
3831{
3832 unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu);
3833 return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun);
3834}
3835
3822static struct vio_device_id ibmvfc_device_table[] __devinitdata = { 3836static struct vio_device_id ibmvfc_device_table[] __devinitdata = {
3823 {"fcp", "IBM,vfc-client"}, 3837 {"fcp", "IBM,vfc-client"},
3824 { "", "" } 3838 { "", "" }
@@ -3829,6 +3843,7 @@ static struct vio_driver ibmvfc_driver = {
3829 .id_table = ibmvfc_device_table, 3843 .id_table = ibmvfc_device_table,
3830 .probe = ibmvfc_probe, 3844 .probe = ibmvfc_probe,
3831 .remove = ibmvfc_remove, 3845 .remove = ibmvfc_remove,
3846 .get_desired_dma = ibmvfc_get_desired_dma,
3832 .driver = { 3847 .driver = {
3833 .name = IBMVFC_NAME, 3848 .name = IBMVFC_NAME,
3834 .owner = THIS_MODULE, 3849 .owner = THIS_MODULE,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 5d23368a1bce..20000ec79b04 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -72,6 +72,7 @@
72#include <linux/delay.h> 72#include <linux/delay.h>
73#include <asm/firmware.h> 73#include <asm/firmware.h>
74#include <asm/vio.h> 74#include <asm/vio.h>
75#include <asm/firmware.h>
75#include <scsi/scsi.h> 76#include <scsi/scsi.h>
76#include <scsi/scsi_cmnd.h> 77#include <scsi/scsi_cmnd.h>
77#include <scsi/scsi_host.h> 78#include <scsi/scsi_host.h>
@@ -426,8 +427,10 @@ static int map_sg_data(struct scsi_cmnd *cmd,
426 SG_ALL * sizeof(struct srp_direct_buf), 427 SG_ALL * sizeof(struct srp_direct_buf),
427 &evt_struct->ext_list_token, 0); 428 &evt_struct->ext_list_token, 0);
428 if (!evt_struct->ext_list) { 429 if (!evt_struct->ext_list) {
429 sdev_printk(KERN_ERR, cmd->device, 430 if (!firmware_has_feature(FW_FEATURE_CMO))
430 "Can't allocate memory for indirect table\n"); 431 sdev_printk(KERN_ERR, cmd->device,
432 "Can't allocate memory "
433 "for indirect table\n");
431 return 0; 434 return 0;
432 } 435 }
433 } 436 }
@@ -743,7 +746,9 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
743 srp_cmd->lun = ((u64) lun) << 48; 746 srp_cmd->lun = ((u64) lun) << 48;
744 747
745 if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { 748 if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
746 sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n"); 749 if (!firmware_has_feature(FW_FEATURE_CMO))
750 sdev_printk(KERN_ERR, cmnd->device,
751 "couldn't convert cmd to srp_cmd\n");
747 free_event_struct(&hostdata->pool, evt_struct); 752 free_event_struct(&hostdata->pool, evt_struct);
748 return SCSI_MLQUEUE_HOST_BUSY; 753 return SCSI_MLQUEUE_HOST_BUSY;
749 } 754 }
@@ -855,7 +860,10 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
855 DMA_BIDIRECTIONAL); 860 DMA_BIDIRECTIONAL);
856 861
857 if (dma_mapping_error(req->buffer)) { 862 if (dma_mapping_error(req->buffer)) {
858 dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n"); 863 if (!firmware_has_feature(FW_FEATURE_CMO))
864 dev_err(hostdata->dev,
865 "Unable to map request_buffer for "
866 "adapter_info!\n");
859 free_event_struct(&hostdata->pool, evt_struct); 867 free_event_struct(&hostdata->pool, evt_struct);
860 return; 868 return;
861 } 869 }
@@ -1400,7 +1408,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
1400 DMA_BIDIRECTIONAL); 1408 DMA_BIDIRECTIONAL);
1401 1409
1402 if (dma_mapping_error(host_config->buffer)) { 1410 if (dma_mapping_error(host_config->buffer)) {
1403 dev_err(hostdata->dev, "dma_mapping error getting host config\n"); 1411 if (!firmware_has_feature(FW_FEATURE_CMO))
1412 dev_err(hostdata->dev,
1413 "dma_mapping error getting host config\n");
1404 free_event_struct(&hostdata->pool, evt_struct); 1414 free_event_struct(&hostdata->pool, evt_struct);
1405 return -1; 1415 return -1;
1406 } 1416 }
@@ -1604,7 +1614,7 @@ static struct scsi_host_template driver_template = {
1604 .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler, 1614 .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
1605 .slave_configure = ibmvscsi_slave_configure, 1615 .slave_configure = ibmvscsi_slave_configure,
1606 .change_queue_depth = ibmvscsi_change_queue_depth, 1616 .change_queue_depth = ibmvscsi_change_queue_depth,
1607 .cmd_per_lun = 16, 1617 .cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT,
1608 .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT, 1618 .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
1609 .this_id = -1, 1619 .this_id = -1,
1610 .sg_tablesize = SG_ALL, 1620 .sg_tablesize = SG_ALL,
@@ -1613,6 +1623,26 @@ static struct scsi_host_template driver_template = {
1613}; 1623};
1614 1624
1615/** 1625/**
1626 * ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver
1627 *
1628 * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
1629 *
1630 * Return value:
1631 * Number of bytes of IO data the driver will need to perform well.
1632 */
1633static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev)
1634{
1635 /* iu_storage data allocated in initialize_event_pool */
1636 unsigned long desired_io = max_requests * sizeof(union viosrp_iu);
1637
1638 /* add io space for sg data */
1639 desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT *
1640 IBMVSCSI_CMDS_PER_LUN_DEFAULT);
1641
1642 return desired_io;
1643}
1644
1645/**
1616 * Called by bus code for each adapter 1646 * Called by bus code for each adapter
1617 */ 1647 */
1618static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) 1648static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
@@ -1641,7 +1671,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1641 hostdata->host = host; 1671 hostdata->host = host;
1642 hostdata->dev = dev; 1672 hostdata->dev = dev;
1643 atomic_set(&hostdata->request_limit, -1); 1673 atomic_set(&hostdata->request_limit, -1);
1644 hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */ 1674 hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
1645 1675
1646 rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests); 1676 rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests);
1647 if (rc != 0 && rc != H_RESOURCE) { 1677 if (rc != 0 && rc != H_RESOURCE) {
@@ -1735,6 +1765,7 @@ static struct vio_driver ibmvscsi_driver = {
1735 .id_table = ibmvscsi_device_table, 1765 .id_table = ibmvscsi_device_table,
1736 .probe = ibmvscsi_probe, 1766 .probe = ibmvscsi_probe,
1737 .remove = ibmvscsi_remove, 1767 .remove = ibmvscsi_remove,
1768 .get_desired_dma = ibmvscsi_get_desired_dma,
1738 .driver = { 1769 .driver = {
1739 .name = "ibmvscsi", 1770 .name = "ibmvscsi",
1740 .owner = THIS_MODULE, 1771 .owner = THIS_MODULE,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h
index 46e850e302c7..2d4339d5e16e 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.h
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.h
@@ -45,6 +45,8 @@ struct Scsi_Host;
45#define MAX_INDIRECT_BUFS 10 45#define MAX_INDIRECT_BUFS 10
46 46
47#define IBMVSCSI_MAX_REQUESTS_DEFAULT 100 47#define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
48#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16
49#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */
48#define IBMVSCSI_MAX_CMDS_PER_LUN 64 50#define IBMVSCSI_MAX_CMDS_PER_LUN 64
49 51
50/* ------------------------------------------------------------ 52/* ------------------------------------------------------------
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 88d180306cf9..3b6ff854d983 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif 132#endif
133 133
134#ifndef ELF_BASE_PLATFORM
135/*
136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138 * will be copied to the user stack in the same manner as AT_PLATFORM.
139 */
140#define ELF_BASE_PLATFORM NULL
141#endif
142
134static int 143static int
135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 unsigned long load_addr, unsigned long interp_load_addr) 145 unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
142 elf_addr_t __user *envp; 151 elf_addr_t __user *envp;
143 elf_addr_t __user *sp; 152 elf_addr_t __user *sp;
144 elf_addr_t __user *u_platform; 153 elf_addr_t __user *u_platform;
154 elf_addr_t __user *u_base_platform;
145 const char *k_platform = ELF_PLATFORM; 155 const char *k_platform = ELF_PLATFORM;
156 const char *k_base_platform = ELF_BASE_PLATFORM;
146 int items; 157 int items;
147 elf_addr_t *elf_info; 158 elf_addr_t *elf_info;
148 int ei_index = 0; 159 int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
172 return -EFAULT; 183 return -EFAULT;
173 } 184 }
174 185
186 /*
187 * If this architecture has a "base" platform capability
188 * string, copy it to userspace.
189 */
190 u_base_platform = NULL;
191 if (k_base_platform) {
192 size_t len = strlen(k_base_platform) + 1;
193
194 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
195 if (__copy_to_user(u_base_platform, k_base_platform, len))
196 return -EFAULT;
197 }
198
175 /* Create the ELF interpreter info */ 199 /* Create the ELF interpreter info */
176 elf_info = (elf_addr_t *)current->mm->saved_auxv; 200 elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 201 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
209 NEW_AUX_ENT(AT_PLATFORM, 233 NEW_AUX_ENT(AT_PLATFORM,
210 (elf_addr_t)(unsigned long)u_platform); 234 (elf_addr_t)(unsigned long)u_platform);
211 } 235 }
236 if (k_base_platform) {
237 NEW_AUX_ENT(AT_BASE_PLATFORM,
238 (elf_addr_t)(unsigned long)u_base_platform);
239 }
212 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 240 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); 241 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 } 242 }
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 2a3e9075a5a0..ef8a248dfd55 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -127,6 +127,8 @@ extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
127extern void do_feature_fixups(unsigned long value, void *fixup_start, 127extern void do_feature_fixups(unsigned long value, void *fixup_start,
128 void *fixup_end); 128 void *fixup_end);
129 129
130extern const char *powerpc_base_platform;
131
130#endif /* __ASSEMBLY__ */ 132#endif /* __ASSEMBLY__ */
131 133
132/* CPU kernel features */ 134/* CPU kernel features */
diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h
index 89664675b469..80d1f399ee51 100644
--- a/include/asm-powerpc/elf.h
+++ b/include/asm-powerpc/elf.h
@@ -217,6 +217,14 @@ typedef elf_vrregset_t elf_fpxregset_t;
217 217
218#define ELF_PLATFORM (cur_cpu_spec->platform) 218#define ELF_PLATFORM (cur_cpu_spec->platform)
219 219
220/* While ELF_PLATFORM indicates the ISA supported by the platform, it
221 * may not accurately reflect the underlying behavior of the hardware
222 * (as in the case of running in Power5+ compatibility mode on a
223 * Power6 machine). ELF_BASE_PLATFORM allows ld.so to load libraries
224 * that are tuned for the real hardware.
225 */
226#define ELF_BASE_PLATFORM (powerpc_base_platform)
227
220#ifdef __powerpc64__ 228#ifdef __powerpc64__
221# define ELF_PLAT_INIT(_r, load_addr) do { \ 229# define ELF_PLAT_INIT(_r, load_addr) do { \
222 _r->gpr[2] = load_addr; \ 230 _r->gpr[2] = load_addr; \
diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h
index ef328995ba9d..3a179827528d 100644
--- a/include/asm-powerpc/firmware.h
+++ b/include/asm-powerpc/firmware.h
@@ -46,6 +46,7 @@
46#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) 46#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
47#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) 47#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000)
48#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000) 48#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000)
49#define FW_FEATURE_CMO ASM_CONST(0x0000000004000000)
49 50
50#ifndef __ASSEMBLY__ 51#ifndef __ASSEMBLY__
51 52
@@ -58,7 +59,7 @@ enum {
58 FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | 59 FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
59 FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | 60 FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
60 FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | 61 FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
61 FW_FEATURE_SPLPAR | FW_FEATURE_LPAR, 62 FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO,
62 FW_FEATURE_PSERIES_ALWAYS = 0, 63 FW_FEATURE_PSERIES_ALWAYS = 0,
63 FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, 64 FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
64 FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, 65 FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index bf6cd7cb996c..fbe2932fa9e9 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -92,6 +92,11 @@
92#define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */ 92#define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */
93#define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */ 93#define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */
94#define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */ 94#define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */
95#define H_PAGE_STATE_CHANGE (1UL<<(63-28))
96#define H_PAGE_UNUSED ((1UL<<(63-29)) | (1UL<<(63-30)))
97#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
98#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1UL<<(63-31)))
99#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
95#define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */ 100#define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */
96#define H_ANDCOND (1UL<<(63-33)) 101#define H_ANDCOND (1UL<<(63-33))
97#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ 102#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */
@@ -210,7 +215,9 @@
210#define H_JOIN 0x298 215#define H_JOIN 0x298
211#define H_VASI_STATE 0x2A4 216#define H_VASI_STATE 0x2A4
212#define H_ENABLE_CRQ 0x2B0 217#define H_ENABLE_CRQ 0x2B0
213#define MAX_HCALL_OPCODE H_ENABLE_CRQ 218#define H_SET_MPP 0x2D0
219#define H_GET_MPP 0x2D4
220#define MAX_HCALL_OPCODE H_GET_MPP
214 221
215#ifndef __ASSEMBLY__ 222#ifndef __ASSEMBLY__
216 223
@@ -270,6 +277,20 @@ struct hcall_stats {
270}; 277};
271#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) 278#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
272 279
280struct hvcall_mpp_data {
281 unsigned long entitled_mem;
282 unsigned long mapped_mem;
283 unsigned short group_num;
284 unsigned short pool_num;
285 unsigned char mem_weight;
286 unsigned char unallocated_mem_weight;
287 unsigned long unallocated_entitlement; /* value in bytes */
288 unsigned long pool_size;
289 signed long loan_request;
290 unsigned long backing_mem;
291};
292
293int h_get_mpp(struct hvcall_mpp_data *);
273#endif /* __ASSEMBLY__ */ 294#endif /* __ASSEMBLY__ */
274#endif /* __KERNEL__ */ 295#endif /* __KERNEL__ */
275#endif /* _ASM_POWERPC_HVCALL_H */ 296#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h
index 567ed92cd91f..2fe268b10333 100644
--- a/include/asm-powerpc/lppaca.h
+++ b/include/asm-powerpc/lppaca.h
@@ -125,7 +125,10 @@ struct lppaca {
125 // NOTE: This value will ALWAYS be zero for dedicated processors and 125 // NOTE: This value will ALWAYS be zero for dedicated processors and
126 // will NEVER be zero for shared processors (ie, initialized to a 1). 126 // will NEVER be zero for shared processors (ie, initialized to a 1).
127 volatile u32 yield_count; // PLIC increments each dispatchx00-x03 127 volatile u32 yield_count; // PLIC increments each dispatchx00-x03
128 u8 reserved6[124]; // Reserved x04-x7F 128 u32 reserved6;
129 volatile u64 cmo_faults; // CMO page fault count x08-x0F
130 volatile u64 cmo_fault_time; // CMO page fault time x10-x17
131 u8 reserved7[104]; // Reserved x18-x7F
129 132
130//============================================================================= 133//=============================================================================
131// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data 134// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 1233d735fd28..893aafd87fde 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -76,7 +76,7 @@ struct machdep_calls {
76 * destroyed as well */ 76 * destroyed as well */
77 void (*hpte_clear_all)(void); 77 void (*hpte_clear_all)(void);
78 78
79 void (*tce_build)(struct iommu_table * tbl, 79 int (*tce_build)(struct iommu_table *tbl,
80 long index, 80 long index,
81 long npages, 81 long npages,
82 unsigned long uaddr, 82 unsigned long uaddr,
diff --git a/include/asm-powerpc/mpc52xx_psc.h b/include/asm-powerpc/mpc52xx_psc.h
index 710c5d36efaa..8917ed630565 100644
--- a/include/asm-powerpc/mpc52xx_psc.h
+++ b/include/asm-powerpc/mpc52xx_psc.h
@@ -60,10 +60,12 @@
60#define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002 60#define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002
61#define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001 61#define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001
62 62
63/* PSC interrupt mask bits */ 63/* PSC interrupt status/mask bits */
64#define MPC52xx_PSC_IMR_TXRDY 0x0100 64#define MPC52xx_PSC_IMR_TXRDY 0x0100
65#define MPC52xx_PSC_IMR_RXRDY 0x0200 65#define MPC52xx_PSC_IMR_RXRDY 0x0200
66#define MPC52xx_PSC_IMR_DB 0x0400 66#define MPC52xx_PSC_IMR_DB 0x0400
67#define MPC52xx_PSC_IMR_TXEMP 0x0800
68#define MPC52xx_PSC_IMR_ORERR 0x1000
67#define MPC52xx_PSC_IMR_IPC 0x8000 69#define MPC52xx_PSC_IMR_IPC 0x8000
68 70
69/* PSC input port change bit */ 71/* PSC input port change bit */
@@ -92,6 +94,34 @@
92 94
93#define MPC52xx_PSC_RFNUM_MASK 0x01ff 95#define MPC52xx_PSC_RFNUM_MASK 0x01ff
94 96
97#define MPC52xx_PSC_SICR_DTS1 (1 << 29)
98#define MPC52xx_PSC_SICR_SHDR (1 << 28)
99#define MPC52xx_PSC_SICR_SIM_MASK (0xf << 24)
100#define MPC52xx_PSC_SICR_SIM_UART (0x0 << 24)
101#define MPC52xx_PSC_SICR_SIM_UART_DCD (0x8 << 24)
102#define MPC52xx_PSC_SICR_SIM_CODEC_8 (0x1 << 24)
103#define MPC52xx_PSC_SICR_SIM_CODEC_16 (0x2 << 24)
104#define MPC52xx_PSC_SICR_SIM_AC97 (0x3 << 24)
105#define MPC52xx_PSC_SICR_SIM_SIR (0x8 << 24)
106#define MPC52xx_PSC_SICR_SIM_SIR_DCD (0xc << 24)
107#define MPC52xx_PSC_SICR_SIM_MIR (0x5 << 24)
108#define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24)
109#define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24)
110#define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24)
111#define MPC52xx_PSC_SICR_GENCLK (1 << 23)
112#define MPC52xx_PSC_SICR_I2S (1 << 22)
113#define MPC52xx_PSC_SICR_CLKPOL (1 << 21)
114#define MPC52xx_PSC_SICR_SYNCPOL (1 << 20)
115#define MPC52xx_PSC_SICR_CELLSLAVE (1 << 19)
116#define MPC52xx_PSC_SICR_CELL2XCLK (1 << 18)
117#define MPC52xx_PSC_SICR_ESAI (1 << 17)
118#define MPC52xx_PSC_SICR_ENAC97 (1 << 16)
119#define MPC52xx_PSC_SICR_SPI (1 << 15)
120#define MPC52xx_PSC_SICR_MSTR (1 << 14)
121#define MPC52xx_PSC_SICR_CPOL (1 << 13)
122#define MPC52xx_PSC_SICR_CPHA (1 << 12)
123#define MPC52xx_PSC_SICR_USEEOF (1 << 11)
124#define MPC52xx_PSC_SICR_DISABLEEOF (1 << 10)
95 125
96/* Structure of the hardware registers */ 126/* Structure of the hardware registers */
97struct mpc52xx_psc { 127struct mpc52xx_psc {
@@ -132,8 +162,12 @@ struct mpc52xx_psc {
132 u8 reserved5[3]; 162 u8 reserved5[3];
133 u8 ctlr; /* PSC + 0x1c */ 163 u8 ctlr; /* PSC + 0x1c */
134 u8 reserved6[3]; 164 u8 reserved6[3];
135 u16 ccr; /* PSC + 0x20 */ 165 /* BitClkDiv field of CCR is byte swapped in
136 u8 reserved7[14]; 166 * the hardware for mpc5200/b compatibility */
167 u32 ccr; /* PSC + 0x20 */
168 u32 ac97_slots; /* PSC + 0x24 */
169 u32 ac97_cmd; /* PSC + 0x28 */
170 u32 ac97_data; /* PSC + 0x2c */
137 u8 ivr; /* PSC + 0x30 */ 171 u8 ivr; /* PSC + 0x30 */
138 u8 reserved8[3]; 172 u8 reserved8[3];
139 u8 ip; /* PSC + 0x34 */ 173 u8 ip; /* PSC + 0x34 */
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index d18ffe7bc7c4..dbb8ca172e44 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -38,6 +38,19 @@ extern void paging_init(void);
38 remap_pfn_range(vma, vaddr, pfn, size, prot) 38 remap_pfn_range(vma, vaddr, pfn, size, prot)
39 39
40#include <asm-generic/pgtable.h> 40#include <asm-generic/pgtable.h>
41
42
43/*
44 * This gets called at the end of handling a page fault, when
45 * the kernel has put a new PTE into the page table for the process.
46 * We use it to ensure coherency between the i-cache and d-cache
47 * for the page which has just been mapped in.
48 * On machines which use an MMU hash table, we use this to put a
49 * corresponding HPTE into the hash table ahead of time, instead of
50 * waiting for the inevitable extra hash-table miss exception.
51 */
52extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
53
41#endif /* __ASSEMBLY__ */ 54#endif /* __ASSEMBLY__ */
42 55
43#endif /* __KERNEL__ */ 56#endif /* __KERNEL__ */
diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h
index 2b8a458f990a..eb8eb400c664 100644
--- a/include/asm-powerpc/syscalls.h
+++ b/include/asm-powerpc/syscalls.h
@@ -31,6 +31,7 @@ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2,
31 unsigned long p3, unsigned long p4, unsigned long p5, 31 unsigned long p3, unsigned long p4, unsigned long p5,
32 unsigned long p6, struct pt_regs *regs); 32 unsigned long p6, struct pt_regs *regs);
33asmlinkage long sys_pipe(int __user *fildes); 33asmlinkage long sys_pipe(int __user *fildes);
34asmlinkage long sys_pipe2(int __user *fildes, int flags);
34asmlinkage long sys_rt_sigaction(int sig, 35asmlinkage long sys_rt_sigaction(int sig,
35 const struct sigaction __user *act, 36 const struct sigaction __user *act,
36 struct sigaction __user *oact, size_t sigsetsize); 37 struct sigaction __user *oact, size_t sigsetsize);
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index ae7085c65692..e084272ed1c2 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -316,3 +316,9 @@ COMPAT_SYS(fallocate)
316SYSCALL(subpage_prot) 316SYSCALL(subpage_prot)
317COMPAT_SYS_SPU(timerfd_settime) 317COMPAT_SYS_SPU(timerfd_settime)
318COMPAT_SYS_SPU(timerfd_gettime) 318COMPAT_SYS_SPU(timerfd_gettime)
319COMPAT_SYS_SPU(signalfd4)
320SYSCALL_SPU(eventfd2)
321SYSCALL_SPU(epoll_create1)
322SYSCALL_SPU(dup3)
323SYSCALL_SPU(pipe2)
324SYSCALL(inotify_init1)
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index e6e25e2364eb..d6648c143322 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -110,6 +110,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
110#endif 110#endif
111 111
112extern int set_dabr(unsigned long dabr); 112extern int set_dabr(unsigned long dabr);
113extern void do_dabr(struct pt_regs *regs, unsigned long address,
114 unsigned long error_code);
113extern void print_backtrace(unsigned long *); 115extern void print_backtrace(unsigned long *);
114extern void show_regs(struct pt_regs * regs); 116extern void show_regs(struct pt_regs * regs);
115extern void flush_instruction_cache(void); 117extern void flush_instruction_cache(void);
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index 5c9108147644..361cd5c7a32b 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -162,16 +162,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
162 162
163#endif 163#endif
164 164
165/*
166 * This gets called at the end of handling a page fault, when
167 * the kernel has put a new PTE into the page table for the process.
168 * We use it to ensure coherency between the i-cache and d-cache
169 * for the page which has just been mapped in.
170 * On machines which use an MMU hash table, we use this to put a
171 * corresponding HPTE into the hash table ahead of time, instead of
172 * waiting for the inevitable extra hash-table miss exception.
173 */
174extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
175
176#endif /*__KERNEL__ */ 165#endif /*__KERNEL__ */
177#endif /* _ASM_POWERPC_TLBFLUSH_H */ 166#endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index ce91bb662063..e07d0c76ed77 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -335,10 +335,16 @@
335#define __NR_subpage_prot 310 335#define __NR_subpage_prot 310
336#define __NR_timerfd_settime 311 336#define __NR_timerfd_settime 311
337#define __NR_timerfd_gettime 312 337#define __NR_timerfd_gettime 312
338#define __NR_signalfd4 313
339#define __NR_eventfd2 314
340#define __NR_epoll_create1 315
341#define __NR_dup3 316
342#define __NR_pipe2 317
343#define __NR_inotify_init1 318
338 344
339#ifdef __KERNEL__ 345#ifdef __KERNEL__
340 346
341#define __NR_syscalls 313 347#define __NR_syscalls 319
342 348
343#define __NR__exit __NR_exit 349#define __NR__exit __NR_exit
344#define NR_syscalls __NR_syscalls 350#define NR_syscalls __NR_syscalls
diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h
index 56512a968dab..0a290a195946 100644
--- a/include/asm-powerpc/vio.h
+++ b/include/asm-powerpc/vio.h
@@ -39,16 +39,32 @@
39#define VIO_IRQ_DISABLE 0UL 39#define VIO_IRQ_DISABLE 0UL
40#define VIO_IRQ_ENABLE 1UL 40#define VIO_IRQ_ENABLE 1UL
41 41
42/*
43 * VIO CMO minimum entitlement for all devices and spare entitlement
44 */
45#define VIO_CMO_MIN_ENT 1562624
46
42struct iommu_table; 47struct iommu_table;
43 48
44/* 49/**
45 * The vio_dev structure is used to describe virtual I/O devices. 50 * vio_dev - This structure is used to describe virtual I/O devices.
51 *
52 * @desired: set from return of driver's get_desired_dma() function
53 * @entitled: bytes of IO data that has been reserved for this device.
54 * @allocated: bytes of IO data currently in use by the device.
55 * @allocs_failed: number of DMA failures due to insufficient entitlement.
46 */ 56 */
47struct vio_dev { 57struct vio_dev {
48 const char *name; 58 const char *name;
49 const char *type; 59 const char *type;
50 uint32_t unit_address; 60 uint32_t unit_address;
51 unsigned int irq; 61 unsigned int irq;
62 struct {
63 size_t desired;
64 size_t entitled;
65 size_t allocated;
66 atomic_t allocs_failed;
67 } cmo;
52 struct device dev; 68 struct device dev;
53}; 69};
54 70
@@ -56,12 +72,19 @@ struct vio_driver {
56 const struct vio_device_id *id_table; 72 const struct vio_device_id *id_table;
57 int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); 73 int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
58 int (*remove)(struct vio_dev *dev); 74 int (*remove)(struct vio_dev *dev);
75 /* A driver must have a get_desired_dma() function to
76 * be loaded in a CMO environment if it uses DMA.
77 */
78 unsigned long (*get_desired_dma)(struct vio_dev *dev);
59 struct device_driver driver; 79 struct device_driver driver;
60}; 80};
61 81
62extern int vio_register_driver(struct vio_driver *drv); 82extern int vio_register_driver(struct vio_driver *drv);
63extern void vio_unregister_driver(struct vio_driver *drv); 83extern void vio_unregister_driver(struct vio_driver *drv);
64 84
85extern int vio_cmo_entitlement_update(size_t);
86extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
87
65extern void __devinit vio_unregister_device(struct vio_dev *dev); 88extern void __devinit vio_unregister_device(struct vio_dev *dev);
66 89
67struct device_node; 90struct device_node;
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index 0da17d14fd13..d7afa9dd6635 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -26,9 +26,13 @@
26 26
27#define AT_SECURE 23 /* secure mode boolean */ 27#define AT_SECURE 23 /* secure mode boolean */
28 28
29#define AT_BASE_PLATFORM 24 /* string identifying real platform, may
30 * differ from AT_PLATFORM. */
31
29#define AT_EXECFN 31 /* filename of program */ 32#define AT_EXECFN 31 /* filename of program */
33
30#ifdef __KERNEL__ 34#ifdef __KERNEL__
31#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */ 35#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
32 /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ 36 /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
33#endif 37#endif
34 38