aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 14:08:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 14:08:17 -0400
commit5047887caf1806f31652210df27fb62a7c43f27d (patch)
tree4098ead40c1aa7b904167f67cff87a247cfa0b6c /arch/powerpc/platforms
parent996abf053eec4d67136be8b911bbaaf989cfb99c (diff)
parent973b7d83ebeb1e34b8bee69208916e5f0e2353c3 (diff)
Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (34 commits) powerpc: Wireup new syscalls Move update_mmu_cache() declaration from tlbflush.h to pgtable.h powerpc/pseries: Remove kmalloc call in handling writes to lparcfg powerpc/pseries: Update arch vector to indicate support for CMO ibmvfc: Add support for collaborative memory overcommit ibmvscsi: driver enablement for CMO ibmveth: enable driver for CMO ibmveth: Automatically enable larger rx buffer pools for larger mtu powerpc/pseries: Verify CMO memory entitlement updates with virtual I/O powerpc/pseries: vio bus support for CMO powerpc/pseries: iommu enablement for CMO powerpc/pseries: Add CMO paging statistics powerpc/pseries: Add collaborative memory manager powerpc/pseries: Utilities to set firmware page state powerpc/pseries: Enable CMO feature during platform setup powerpc/pseries: Split retrieval of processor entitlement data into a helper routine powerpc/pseries: Add memory entitlement capabilities to /proc/ppc64/lparcfg powerpc/pseries: Split processor entitlement retrieval and gathering to helper routines powerpc/pseries: Remove extraneous error reporting for hcall failures in lparcfg powerpc: Fix compile error with binutils 2.15 ... Fixed up conflict in arch/powerpc/platforms/52xx/Kconfig manually.
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/52xx/Kconfig2
-rw-r--r--arch/powerpc/platforms/cell/iommu.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c35
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.c3
-rw-r--r--arch/powerpc/platforms/iseries/iommu.c3
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c3
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig23
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c468
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c42
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c71
12 files changed, 648 insertions, 29 deletions
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index ccbd4958412e..696a5ee4962d 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,6 @@
1config PPC_MPC52xx 1config PPC_MPC52xx
2 bool "52xx-based boards" 2 bool "52xx-based boards"
3 depends on PPC_MULTIPLATFORM && PPC32 3 depends on PPC_MULTIPLATFORM && PPC32
4 select FSL_SOC
5 select PPC_CLOCK 4 select PPC_CLOCK
6 select PPC_PCI_CHOICE 5 select PPC_PCI_CHOICE
7 6
@@ -49,5 +48,6 @@ config PPC_MPC5200_GPIO
49 bool "MPC5200 GPIO support" 48 bool "MPC5200 GPIO support"
50 depends on PPC_MPC52xx 49 depends on PPC_MPC52xx
51 select ARCH_REQUIRE_GPIOLIB 50 select ARCH_REQUIRE_GPIOLIB
51 select GENERIC_GPIO
52 help 52 help
53 Enable gpiolib support for mpc5200 based boards 53 Enable gpiolib support for mpc5200 based boards
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 208005ca262c..e06420af5fe9 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
172 } 172 }
173} 173}
174 174
175static void tce_build_cell(struct iommu_table *tbl, long index, long npages, 175static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
176 unsigned long uaddr, enum dma_data_direction direction, 176 unsigned long uaddr, enum dma_data_direction direction,
177 struct dma_attrs *attrs) 177 struct dma_attrs *attrs)
178{ 178{
@@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
213 213
214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", 214 pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
215 index, npages, direction, base_pte); 215 index, npages, direction, base_pte);
216 return 0;
216} 217}
217 218
218static void tce_free_cell(struct iommu_table *tbl, long index, long npages) 219static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
@@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled;
1150 1151
1151static int __init setup_iommu_fixed(char *str) 1152static int __init setup_iommu_fixed(char *str)
1152{ 1153{
1154 struct device_node *pciep;
1155
1153 if (strcmp(str, "off") == 0) 1156 if (strcmp(str, "off") == 0)
1154 iommu_fixed_disabled = 1; 1157 iommu_fixed_disabled = 1;
1155 1158
1156 else if (strcmp(str, "weak") == 0) 1159 /* If we can find a pcie-endpoint in the device tree assume that
1160 * we're on a triblade or a CAB so by default the fixed mapping
1161 * should be set to be weakly ordered; but only if the boot
1162 * option WASN'T set for strong ordering
1163 */
1164 pciep = of_find_node_by_type(NULL, "pcie-endpoint");
1165
1166 if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
1157 iommu_fixed_is_weak = 1; 1167 iommu_fixed_is_weak = 1;
1158 1168
1169 of_node_put(pciep);
1170
1159 return 1; 1171 return 1;
1160} 1172}
1161__setup("iommu_fixed=", setup_iommu_fixed); 1173__setup("iommu_fixed=", setup_iommu_fixed);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 34654743363d..2deeeba7eccf 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -312,11 +312,28 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
312 */ 312 */
313 node = cpu_to_node(raw_smp_processor_id()); 313 node = cpu_to_node(raw_smp_processor_id());
314 for (n = 0; n < MAX_NUMNODES; n++, node++) { 314 for (n = 0; n < MAX_NUMNODES; n++, node++) {
315 int available_spus;
316
315 node = (node < MAX_NUMNODES) ? node : 0; 317 node = (node < MAX_NUMNODES) ? node : 0;
316 if (!node_allowed(ctx, node)) 318 if (!node_allowed(ctx, node))
317 continue; 319 continue;
320
321 available_spus = 0;
318 mutex_lock(&cbe_spu_info[node].list_mutex); 322 mutex_lock(&cbe_spu_info[node].list_mutex);
319 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 323 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
324 if (spu->ctx && spu->ctx->gang
325 && spu->ctx->aff_offset == 0)
326 available_spus -=
327 (spu->ctx->gang->contexts - 1);
328 else
329 available_spus++;
330 }
331 if (available_spus < ctx->gang->contexts) {
332 mutex_unlock(&cbe_spu_info[node].list_mutex);
333 continue;
334 }
335
336 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
320 if ((!mem_aff || spu->has_mem_affinity) && 337 if ((!mem_aff || spu->has_mem_affinity) &&
321 sched_spu(spu)) { 338 sched_spu(spu)) {
322 mutex_unlock(&cbe_spu_info[node].list_mutex); 339 mutex_unlock(&cbe_spu_info[node].list_mutex);
@@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx)
389 if (list_empty(&ctx->aff_list)) 406 if (list_empty(&ctx->aff_list))
390 return 0; 407 return 0;
391 408
409 if (atomic_read(&ctx->gang->aff_sched_count) == 0)
410 ctx->gang->aff_ref_spu = NULL;
411
392 if (!gang->aff_ref_spu) { 412 if (!gang->aff_ref_spu) {
393 if (!(gang->aff_flags & AFF_MERGED)) 413 if (!(gang->aff_flags & AFF_MERGED))
394 aff_merge_remaining_ctxs(gang); 414 aff_merge_remaining_ctxs(gang);
@@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
416 if (spu->ctx->flags & SPU_CREATE_NOSCHED) 436 if (spu->ctx->flags & SPU_CREATE_NOSCHED)
417 atomic_dec(&cbe_spu_info[spu->node].reserved_spus); 437 atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
418 438
419 if (ctx->gang){ 439 if (ctx->gang)
420 mutex_lock(&ctx->gang->aff_mutex); 440 atomic_dec_if_positive(&ctx->gang->aff_sched_count);
421 if (has_affinity(ctx)) {
422 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
423 ctx->gang->aff_ref_spu = NULL;
424 }
425 mutex_unlock(&ctx->gang->aff_mutex);
426 }
427 441
428 spu_switch_notify(spu, NULL); 442 spu_switch_notify(spu, NULL);
429 spu_unmap_mappings(ctx); 443 spu_unmap_mappings(ctx);
@@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
562 goto found; 576 goto found;
563 mutex_unlock(&cbe_spu_info[node].list_mutex); 577 mutex_unlock(&cbe_spu_info[node].list_mutex);
564 578
565 mutex_lock(&ctx->gang->aff_mutex); 579 atomic_dec(&ctx->gang->aff_sched_count);
566 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
567 ctx->gang->aff_ref_spu = NULL;
568 mutex_unlock(&ctx->gang->aff_mutex);
569 goto not_found; 580 goto not_found;
570 } 581 }
571 mutex_unlock(&ctx->gang->aff_mutex); 582 mutex_unlock(&ctx->gang->aff_mutex);
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 8c0e95766a62..92d20e993ede 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -196,8 +196,7 @@ static int __init sputrace_init(void)
196 struct proc_dir_entry *entry; 196 struct proc_dir_entry *entry;
197 int i, error = -ENOMEM; 197 int i, error = -ENOMEM;
198 198
199 sputrace_log = kcalloc(sizeof(struct sputrace), 199 sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
200 bufsize, GFP_KERNEL);
201 if (!sputrace_log) 200 if (!sputrace_log)
202 goto out; 201 goto out;
203 202
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index bc818e4e2033..bb464d1211b2 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
41#include <asm/iseries/hv_call_event.h> 41#include <asm/iseries/hv_call_event.h>
42#include <asm/iseries/iommu.h> 42#include <asm/iseries/iommu.h>
43 43
44static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, 44static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
45 unsigned long uaddr, enum dma_data_direction direction, 45 unsigned long uaddr, enum dma_data_direction direction,
46 struct dma_attrs *attrs) 46 struct dma_attrs *attrs)
47{ 47{
@@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
71 index++; 71 index++;
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 } 73 }
74 return 0;
74} 75}
75 76
76static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) 77static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 70541b7a5013..a0ff03a3d8da 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
83static struct iommu_table iommu_table_iobmap; 83static struct iommu_table iommu_table_iobmap;
84static int iommu_table_iobmap_inited; 84static int iommu_table_iobmap_inited;
85 85
86static void iobmap_build(struct iommu_table *tbl, long index, 86static int iobmap_build(struct iommu_table *tbl, long index,
87 long npages, unsigned long uaddr, 87 long npages, unsigned long uaddr,
88 enum dma_data_direction direction, 88 enum dma_data_direction direction,
89 struct dma_attrs *attrs) 89 struct dma_attrs *attrs)
@@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index,
108 uaddr += IOBMAP_PAGE_SIZE; 108 uaddr += IOBMAP_PAGE_SIZE;
109 bus_addr += IOBMAP_PAGE_SIZE; 109 bus_addr += IOBMAP_PAGE_SIZE;
110 } 110 }
111 return 0;
111} 112}
112 113
113 114
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 757c0296e0b8..97619fd51e39 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG
40 depends on PPC_PSERIES && PPC_EARLY_DEBUG 40 depends on PPC_PSERIES && PPC_EARLY_DEBUG
41 bool "Enable extra debug logging in platforms/pseries" 41 bool "Enable extra debug logging in platforms/pseries"
42 default y 42 default y
43
44config PPC_SMLPAR
45 bool "Support for shared-memory logical partitions"
46 depends on PPC_PSERIES
47 select LPARCFG
48 default n
49 help
50 Select this option to enable shared memory partition support.
51 With this option a system running in an LPAR can be given more
52 memory than physically available and will allow firmware to
53 balance memory across many LPARs.
54
55config CMM
56 tristate "Collaborative memory management"
57 depends on PPC_SMLPAR
58 default y
59 help
60 Select this option, if you want to enable the kernel interface
61 to reduce the memory size of the system. This is accomplished
62 by allocating pages of memory and put them "on hold". This only
63 makes sense for a system running in an LPAR where the unused pages
64 will be reused for other LPARs. The interface allows firmware to
65 balance memory across many LPARs.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 554c6e42ef2a..dfe574af2dc0 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
24obj-$(CONFIG_HVCS) += hvcserver.o 24obj-$(CONFIG_HVCS) += hvcserver.o
25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o 25obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o 26obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
27obj-$(CONFIG_CMM) += cmm.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 000000000000..c6b3be03168b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,468 @@
1/*
2 * Collaborative memory management interface.
3 *
4 * Copyright (C) 2008 IBM Corporation
5 * Author(s): Brian King (brking@linux.vnet.ibm.com),
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <linux/ctype.h>
24#include <linux/delay.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/init.h>
28#include <linux/kthread.h>
29#include <linux/module.h>
30#include <linux/oom.h>
31#include <linux/sched.h>
32#include <linux/stringify.h>
33#include <linux/swap.h>
34#include <linux/sysdev.h>
35#include <asm/firmware.h>
36#include <asm/hvcall.h>
37#include <asm/mmu.h>
38#include <asm/pgalloc.h>
39#include <asm/uaccess.h>
40
41#include "plpar_wrappers.h"
42
43#define CMM_DRIVER_VERSION "1.0.0"
44#define CMM_DEFAULT_DELAY 1
45#define CMM_DEBUG 0
46#define CMM_DISABLE 0
47#define CMM_OOM_KB 1024
48#define CMM_MIN_MEM_MB 256
49#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
50#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
51
52static unsigned int delay = CMM_DEFAULT_DELAY;
53static unsigned int oom_kb = CMM_OOM_KB;
54static unsigned int cmm_debug = CMM_DEBUG;
55static unsigned int cmm_disabled = CMM_DISABLE;
56static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
57static struct sys_device cmm_sysdev;
58
59MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
60MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
61MODULE_LICENSE("GPL");
62MODULE_VERSION(CMM_DRIVER_VERSION);
63
64module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
65MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
66 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
67module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
68MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
69 "[Default=" __stringify(CMM_OOM_KB) "]");
70module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
71MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
72 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
73module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
74MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
75 "[Default=" __stringify(CMM_DEBUG) "]");
76
77#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
78
79#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
80
81struct cmm_page_array {
82 struct cmm_page_array *next;
83 unsigned long index;
84 unsigned long page[CMM_NR_PAGES];
85};
86
87static unsigned long loaned_pages;
88static unsigned long loaned_pages_target;
89static unsigned long oom_freed_pages;
90
91static struct cmm_page_array *cmm_page_list;
92static DEFINE_SPINLOCK(cmm_lock);
93
94static struct task_struct *cmm_thread_ptr;
95
96/**
97 * cmm_alloc_pages - Allocate pages and mark them as loaned
98 * @nr: number of pages to allocate
99 *
100 * Return value:
101 * number of pages requested to be allocated which were not
102 **/
103static long cmm_alloc_pages(long nr)
104{
105 struct cmm_page_array *pa, *npa;
106 unsigned long addr;
107 long rc;
108
109 cmm_dbg("Begin request for %ld pages\n", nr);
110
111 while (nr) {
112 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
113 __GFP_NORETRY | __GFP_NOMEMALLOC);
114 if (!addr)
115 break;
116 spin_lock(&cmm_lock);
117 pa = cmm_page_list;
118 if (!pa || pa->index >= CMM_NR_PAGES) {
119 /* Need a new page for the page list. */
120 spin_unlock(&cmm_lock);
121 npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
122 __GFP_NORETRY | __GFP_NOMEMALLOC);
123 if (!npa) {
124 pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
125 free_page(addr);
126 break;
127 }
128 spin_lock(&cmm_lock);
129 pa = cmm_page_list;
130
131 if (!pa || pa->index >= CMM_NR_PAGES) {
132 npa->next = pa;
133 npa->index = 0;
134 pa = npa;
135 cmm_page_list = pa;
136 } else
137 free_page((unsigned long) npa);
138 }
139
140 if ((rc = plpar_page_set_loaned(__pa(addr)))) {
141 pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
142 spin_unlock(&cmm_lock);
143 free_page(addr);
144 break;
145 }
146
147 pa->page[pa->index++] = addr;
148 loaned_pages++;
149 totalram_pages--;
150 spin_unlock(&cmm_lock);
151 nr--;
152 }
153
154 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
155 return nr;
156}
157
158/**
159 * cmm_free_pages - Free pages and mark them as active
160 * @nr: number of pages to free
161 *
162 * Return value:
163 * number of pages requested to be freed which were not
164 **/
165static long cmm_free_pages(long nr)
166{
167 struct cmm_page_array *pa;
168 unsigned long addr;
169
170 cmm_dbg("Begin free of %ld pages.\n", nr);
171 spin_lock(&cmm_lock);
172 pa = cmm_page_list;
173 while (nr) {
174 if (!pa || pa->index <= 0)
175 break;
176 addr = pa->page[--pa->index];
177
178 if (pa->index == 0) {
179 pa = pa->next;
180 free_page((unsigned long) cmm_page_list);
181 cmm_page_list = pa;
182 }
183
184 plpar_page_set_active(__pa(addr));
185 free_page(addr);
186 loaned_pages--;
187 nr--;
188 totalram_pages++;
189 }
190 spin_unlock(&cmm_lock);
191 cmm_dbg("End request with %ld pages unfulfilled\n", nr);
192 return nr;
193}
194
195/**
196 * cmm_oom_notify - OOM notifier
197 * @self: notifier block struct
198 * @dummy: not used
199 * @parm: returned - number of pages freed
200 *
201 * Return value:
202 * NOTIFY_OK
203 **/
204static int cmm_oom_notify(struct notifier_block *self,
205 unsigned long dummy, void *parm)
206{
207 unsigned long *freed = parm;
208 long nr = KB2PAGES(oom_kb);
209
210 cmm_dbg("OOM processing started\n");
211 nr = cmm_free_pages(nr);
212 loaned_pages_target = loaned_pages;
213 *freed += KB2PAGES(oom_kb) - nr;
214 oom_freed_pages += KB2PAGES(oom_kb) - nr;
215 cmm_dbg("OOM processing complete\n");
216 return NOTIFY_OK;
217}
218
219/**
220 * cmm_get_mpp - Read memory performance parameters
221 *
222 * Makes hcall to query the current page loan request from the hypervisor.
223 *
224 * Return value:
225 * nothing
226 **/
227static void cmm_get_mpp(void)
228{
229 int rc;
230 struct hvcall_mpp_data mpp_data;
231 unsigned long active_pages_target;
232 signed long page_loan_request;
233
234 rc = h_get_mpp(&mpp_data);
235
236 if (rc != H_SUCCESS)
237 return;
238
239 page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
240 loaned_pages_target = page_loan_request + loaned_pages;
241 if (loaned_pages_target > oom_freed_pages)
242 loaned_pages_target -= oom_freed_pages;
243 else
244 loaned_pages_target = 0;
245
246 active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
247
248 if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
249 loaned_pages_target = totalram_pages + loaned_pages -
250 ((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
251
252 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
253 page_loan_request, loaned_pages, loaned_pages_target,
254 oom_freed_pages, totalram_pages);
255}
256
257static struct notifier_block cmm_oom_nb = {
258 .notifier_call = cmm_oom_notify
259};
260
261/**
262 * cmm_thread - CMM task thread
263 * @dummy: not used
264 *
265 * Return value:
266 * 0
267 **/
268static int cmm_thread(void *dummy)
269{
270 unsigned long timeleft;
271
272 while (1) {
273 timeleft = msleep_interruptible(delay * 1000);
274
275 if (kthread_should_stop() || timeleft) {
276 loaned_pages_target = loaned_pages;
277 break;
278 }
279
280 cmm_get_mpp();
281
282 if (loaned_pages_target > loaned_pages) {
283 if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
284 loaned_pages_target = loaned_pages;
285 } else if (loaned_pages_target < loaned_pages)
286 cmm_free_pages(loaned_pages - loaned_pages_target);
287 }
288 return 0;
289}
290
291#define CMM_SHOW(name, format, args...) \
292 static ssize_t show_##name(struct sys_device *dev, char *buf) \
293 { \
294 return sprintf(buf, format, ##args); \
295 } \
296 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
297
298CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
299CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
300
301static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
302{
303 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
304}
305
306static ssize_t store_oom_pages(struct sys_device *dev,
307 const char *buf, size_t count)
308{
309 unsigned long val = simple_strtoul (buf, NULL, 10);
310
311 if (!capable(CAP_SYS_ADMIN))
312 return -EPERM;
313 if (val != 0)
314 return -EBADMSG;
315
316 oom_freed_pages = 0;
317 return count;
318}
319
320static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
321 show_oom_pages, store_oom_pages);
322
323static struct sysdev_attribute *cmm_attrs[] = {
324 &attr_loaned_kb,
325 &attr_loaned_target_kb,
326 &attr_oom_freed_kb,
327};
328
329static struct sysdev_class cmm_sysdev_class = {
330 .name = "cmm",
331};
332
333/**
334 * cmm_sysfs_register - Register with sysfs
335 *
336 * Return value:
337 * 0 on success / other on failure
338 **/
339static int cmm_sysfs_register(struct sys_device *sysdev)
340{
341 int i, rc;
342
343 if ((rc = sysdev_class_register(&cmm_sysdev_class)))
344 return rc;
345
346 sysdev->id = 0;
347 sysdev->cls = &cmm_sysdev_class;
348
349 if ((rc = sysdev_register(sysdev)))
350 goto class_unregister;
351
352 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
353 if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
354 goto fail;
355 }
356
357 return 0;
358
359fail:
360 while (--i >= 0)
361 sysdev_remove_file(sysdev, cmm_attrs[i]);
362 sysdev_unregister(sysdev);
363class_unregister:
364 sysdev_class_unregister(&cmm_sysdev_class);
365 return rc;
366}
367
368/**
369 * cmm_unregister_sysfs - Unregister from sysfs
370 *
371 **/
372static void cmm_unregister_sysfs(struct sys_device *sysdev)
373{
374 int i;
375
376 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
377 sysdev_remove_file(sysdev, cmm_attrs[i]);
378 sysdev_unregister(sysdev);
379 sysdev_class_unregister(&cmm_sysdev_class);
380}
381
382/**
383 * cmm_init - Module initialization
384 *
385 * Return value:
386 * 0 on success / other on failure
387 **/
388static int cmm_init(void)
389{
390 int rc = -ENOMEM;
391
392 if (!firmware_has_feature(FW_FEATURE_CMO))
393 return -EOPNOTSUPP;
394
395 if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
396 return rc;
397
398 if ((rc = cmm_sysfs_register(&cmm_sysdev)))
399 goto out_oom_notifier;
400
401 if (cmm_disabled)
402 return rc;
403
404 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
405 if (IS_ERR(cmm_thread_ptr)) {
406 rc = PTR_ERR(cmm_thread_ptr);
407 goto out_unregister_sysfs;
408 }
409
410 return rc;
411
412out_unregister_sysfs:
413 cmm_unregister_sysfs(&cmm_sysdev);
414out_oom_notifier:
415 unregister_oom_notifier(&cmm_oom_nb);
416 return rc;
417}
418
419/**
420 * cmm_exit - Module exit
421 *
422 * Return value:
423 * nothing
424 **/
425static void cmm_exit(void)
426{
427 if (cmm_thread_ptr)
428 kthread_stop(cmm_thread_ptr);
429 unregister_oom_notifier(&cmm_oom_nb);
430 cmm_free_pages(loaned_pages);
431 cmm_unregister_sysfs(&cmm_sysdev);
432}
433
434/**
435 * cmm_set_disable - Disable/Enable CMM
436 *
437 * Return value:
438 * 0 on success / other on failure
439 **/
440static int cmm_set_disable(const char *val, struct kernel_param *kp)
441{
442 int disable = simple_strtoul(val, NULL, 10);
443
444 if (disable != 0 && disable != 1)
445 return -EINVAL;
446
447 if (disable && !cmm_disabled) {
448 if (cmm_thread_ptr)
449 kthread_stop(cmm_thread_ptr);
450 cmm_thread_ptr = NULL;
451 cmm_free_pages(loaned_pages);
452 } else if (!disable && cmm_disabled) {
453 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
454 if (IS_ERR(cmm_thread_ptr))
455 return PTR_ERR(cmm_thread_ptr);
456 }
457
458 cmm_disabled = disable;
459 return 0;
460}
461
462module_param_call(disable, cmm_set_disable, param_get_uint,
463 &cmm_disabled, S_IRUGO | S_IWUSR);
464MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
465 "[Default=" __stringify(CMM_DISABLE) "]");
466
467module_init(cmm_init);
468module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 5377dd4b849a..a8c446697f9e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -48,7 +48,7 @@
48#include "plpar_wrappers.h" 48#include "plpar_wrappers.h"
49 49
50 50
51static void tce_build_pSeries(struct iommu_table *tbl, long index, 51static int tce_build_pSeries(struct iommu_table *tbl, long index,
52 long npages, unsigned long uaddr, 52 long npages, unsigned long uaddr,
53 enum dma_data_direction direction, 53 enum dma_data_direction direction,
54 struct dma_attrs *attrs) 54 struct dma_attrs *attrs)
@@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
72 uaddr += TCE_PAGE_SIZE; 72 uaddr += TCE_PAGE_SIZE;
73 tcep++; 73 tcep++;
74 } 74 }
75 return 0;
75} 76}
76 77
77 78
@@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
94 return *tcep; 95 return *tcep;
95} 96}
96 97
97static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, 98static void tce_free_pSeriesLP(struct iommu_table*, long, long);
99static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
100
101static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
98 long npages, unsigned long uaddr, 102 long npages, unsigned long uaddr,
99 enum dma_data_direction direction, 103 enum dma_data_direction direction,
100 struct dma_attrs *attrs) 104 struct dma_attrs *attrs)
101{ 105{
102 u64 rc; 106 u64 rc = 0;
103 u64 proto_tce, tce; 107 u64 proto_tce, tce;
104 u64 rpn; 108 u64 rpn;
109 int ret = 0;
110 long tcenum_start = tcenum, npages_start = npages;
105 111
106 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; 112 rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
107 proto_tce = TCE_PCI_READ; 113 proto_tce = TCE_PCI_READ;
@@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
112 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; 118 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
113 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); 119 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
114 120
121 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
122 ret = (int)rc;
123 tce_free_pSeriesLP(tbl, tcenum_start,
124 (npages_start - (npages + 1)));
125 break;
126 }
127
115 if (rc && printk_ratelimit()) { 128 if (rc && printk_ratelimit()) {
116 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 129 printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
117 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 130 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
123 tcenum++; 136 tcenum++;
124 rpn++; 137 rpn++;
125 } 138 }
139 return ret;
126} 140}
127 141
128static DEFINE_PER_CPU(u64 *, tce_page) = NULL; 142static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
129 143
130static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, 144static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
131 long npages, unsigned long uaddr, 145 long npages, unsigned long uaddr,
132 enum dma_data_direction direction, 146 enum dma_data_direction direction,
133 struct dma_attrs *attrs) 147 struct dma_attrs *attrs)
134{ 148{
135 u64 rc; 149 u64 rc = 0;
136 u64 proto_tce; 150 u64 proto_tce;
137 u64 *tcep; 151 u64 *tcep;
138 u64 rpn; 152 u64 rpn;
139 long l, limit; 153 long l, limit;
154 long tcenum_start = tcenum, npages_start = npages;
155 int ret = 0;
140 156
141 if (npages == 1) { 157 if (npages == 1) {
142 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 158 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
143 direction, attrs); 159 direction, attrs);
144 return;
145 } 160 }
146 161
147 tcep = __get_cpu_var(tce_page); 162 tcep = __get_cpu_var(tce_page);
@@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
153 tcep = (u64 *)__get_free_page(GFP_ATOMIC); 168 tcep = (u64 *)__get_free_page(GFP_ATOMIC);
154 /* If allocation fails, fall back to the loop implementation */ 169 /* If allocation fails, fall back to the loop implementation */
155 if (!tcep) { 170 if (!tcep) {
156 tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, 171 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
157 direction, attrs); 172 direction, attrs);
158 return;
159 } 173 }
160 __get_cpu_var(tce_page) = tcep; 174 __get_cpu_var(tce_page) = tcep;
161 } 175 }
@@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
187 tcenum += limit; 201 tcenum += limit;
188 } while (npages > 0 && !rc); 202 } while (npages > 0 && !rc);
189 203
204 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
205 ret = (int)rc;
206 tce_freemulti_pSeriesLP(tbl, tcenum_start,
207 (npages_start - (npages + limit)));
208 return ret;
209 }
210
190 if (rc && printk_ratelimit()) { 211 if (rc && printk_ratelimit()) {
191 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); 212 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
192 printk("\tindex = 0x%lx\n", (u64)tbl->it_index); 213 printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
@@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
194 printk("\ttce[0] val = 0x%lx\n", tcep[0]); 215 printk("\ttce[0] val = 0x%lx\n", tcep[0]);
195 show_stack(current, (unsigned long *)__get_SP()); 216 show_stack(current, (unsigned long *)__get_SP());
196 } 217 }
218 return ret;
197} 219}
198 220
199static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) 221static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d8680b589dc9..a437267c6bf8 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
42 return vpa_call(0x3, cpu, vpa); 42 return vpa_call(0x3, cpu, vpa);
43} 43}
44 44
45static inline long plpar_page_set_loaned(unsigned long vpa)
46{
47 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
48}
49
50static inline long plpar_page_set_active(unsigned long vpa)
51{
52 return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
53}
54
45extern void vpa_init(int cpu); 55extern void vpa_init(int cpu);
46 56
47static inline long plpar_pte_enter(unsigned long flags, 57static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 90beb444e1dd..063a0d2fba30 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr)
314 H_DABRX_KERNEL | H_DABRX_USER); 314 H_DABRX_KERNEL | H_DABRX_USER);
315} 315}
316 316
317#define CMO_CHARACTERISTICS_TOKEN 44
318#define CMO_MAXLENGTH 1026
319
320/**
321 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
322 * handle that here. (Stolen from parse_system_parameter_string)
323 */
324void pSeries_cmo_feature_init(void)
325{
326 char *ptr, *key, *value, *end;
327 int call_status;
328 int PrPSP = -1;
329 int SecPSP = -1;
330
331 pr_debug(" -> fw_cmo_feature_init()\n");
332 spin_lock(&rtas_data_buf_lock);
333 memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
334 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
335 NULL,
336 CMO_CHARACTERISTICS_TOKEN,
337 __pa(rtas_data_buf),
338 RTAS_DATA_BUF_SIZE);
339
340 if (call_status != 0) {
341 spin_unlock(&rtas_data_buf_lock);
342 pr_debug("CMO not available\n");
343 pr_debug(" <- fw_cmo_feature_init()\n");
344 return;
345 }
346
347 end = rtas_data_buf + CMO_MAXLENGTH - 2;
348 ptr = rtas_data_buf + 2; /* step over strlen value */
349 key = value = ptr;
350
351 while (*ptr && (ptr <= end)) {
352 /* Separate the key and value by replacing '=' with '\0' and
353 * point the value at the string after the '='
354 */
355 if (ptr[0] == '=') {
356 ptr[0] = '\0';
357 value = ptr + 1;
358 } else if (ptr[0] == '\0' || ptr[0] == ',') {
359 /* Terminate the string containing the key/value pair */
360 ptr[0] = '\0';
361
362 if (key == value) {
363 pr_debug("Malformed key/value pair\n");
364 /* Never found a '=', end processing */
365 break;
366 }
367
368 if (0 == strcmp(key, "PrPSP"))
369 PrPSP = simple_strtol(value, NULL, 10);
370 else if (0 == strcmp(key, "SecPSP"))
371 SecPSP = simple_strtol(value, NULL, 10);
372 value = key = ptr + 1;
373 }
374 ptr++;
375 }
376
377 if (PrPSP != -1 || SecPSP != -1) {
378 pr_info("CMO enabled\n");
379 pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
380 powerpc_firmware_features |= FW_FEATURE_CMO;
381 } else
382 pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
383 spin_unlock(&rtas_data_buf_lock);
384 pr_debug(" <- fw_cmo_feature_init()\n");
385}
386
317/* 387/*
318 * Early initialization. Relocation is on but do not reference unbolted pages 388 * Early initialization. Relocation is on but do not reference unbolted pages
319 */ 389 */
@@ -329,6 +399,7 @@ static void __init pSeries_init_early(void)
329 else if (firmware_has_feature(FW_FEATURE_XDABR)) 399 else if (firmware_has_feature(FW_FEATURE_XDABR))
330 ppc_md.set_dabr = pseries_set_xdabr; 400 ppc_md.set_dabr = pseries_set_xdabr;
331 401
402 pSeries_cmo_feature_init();
332 iommu_init_early_pSeries(); 403 iommu_init_early_pSeries();
333 404
334 pr_debug(" <- pSeries_init_early()\n"); 405 pr_debug(" <- pSeries_init_early()\n");