aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2006-12-08 09:56:07 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2006-12-08 09:56:07 -0500
commitf4eb07c17df2e6cf9bd58bfcd9cc9e05e9489d07 (patch)
treec1b4b422d3b8183edf452cc745dadd0fe129018b
parent7f090145a14afc35844dce80174c9c24f9e66ec5 (diff)
[S390] Virtual memmap for s390.
Virtual memmap support for s390. Inspired by the ia64 implementation. Unlike ia64 we need a mechanism which allows us to dynamically attach shared memory regions. These memory regions are accessed via the dcss device driver. dcss implements the 'direct_access' operation, which requires struct pages for every single shared page. Therefore this implementation provides an interface to attach/detach shared memory: int add_shared_memory(unsigned long start, unsigned long size); int remove_shared_memory(unsigned long start, unsigned long size); The purpose of the add_shared_memory function is to add the given memory range to the 1:1 mapping and to make sure that the corresponding range in the vmemmap is backed with physical pages. It also initialises the new struct pages. remove_shared_memory in turn only invalidates the page table entries in the 1:1 mapping. The page tables and the memory used for struct pages in the vmemmap are currently not freed. They will be reused when the next segment will be attached. Given that the maximum size of a shared memory region is 2GB and in addition all regions must reside below 2GB this is not too much of a restriction, but there is room for improvement. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/extmem.c106
-rw-r--r--arch/s390/mm/init.c163
-rw-r--r--arch/s390/mm/vmem.c381
-rw-r--r--include/asm-s390/page.h22
-rw-r--r--include/asm-s390/pgalloc.h3
-rw-r--r--include/asm-s390/pgtable.h16
9 files changed, 488 insertions, 210 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a08e9100e7e4..f12ca8fba71b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -235,6 +235,9 @@ config WARN_STACK_SIZE
235 235
236source "mm/Kconfig" 236source "mm/Kconfig"
237 237
238config HOLES_IN_ZONE
239 def_bool y
240
238comment "I/O subsystem configuration" 241comment "I/O subsystem configuration"
239 242
240config MACHCHK_WARNING 243config MACHCHK_WARNING
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b928fecdc743..b8a1ce215142 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -64,7 +64,7 @@ unsigned int console_devno = -1;
64unsigned int console_irq = -1; 64unsigned int console_irq = -1;
65unsigned long machine_flags = 0; 65unsigned long machine_flags = 0;
66 66
67struct mem_chunk memory_chunk[MEMORY_CHUNKS]; 67struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
68volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ 68volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
69unsigned long __initdata zholes_size[MAX_NR_ZONES]; 69unsigned long __initdata zholes_size[MAX_NR_ZONES];
70static unsigned long __initdata memory_end; 70static unsigned long __initdata memory_end;
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index aa9a42b6e62d..8e09db1edbb9 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,6 +2,6 @@
2# Makefile for the linux s390-specific parts of the memory manager. 2# Makefile for the linux s390-specific parts of the memory manager.
3# 3#
4 4
5obj-y := init.o fault.o ioremap.o extmem.o mmap.o 5obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o
6obj-$(CONFIG_CMM) += cmm.o 6obj-$(CONFIG_CMM) += cmm.o
7 7
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 9e9bc48463a5..775bf19e742b 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -16,6 +16,7 @@
16#include <linux/bootmem.h> 16#include <linux/bootmem.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/pgtable.h>
19#include <asm/ebcdic.h> 20#include <asm/ebcdic.h>
20#include <asm/errno.h> 21#include <asm/errno.h>
21#include <asm/extmem.h> 22#include <asm/extmem.h>
@@ -238,65 +239,6 @@ query_segment_type (struct dcss_segment *seg)
238} 239}
239 240
240/* 241/*
241 * check if the given segment collides with guest storage.
242 * returns 1 if this is the case, 0 if no collision was found
243 */
244static int
245segment_overlaps_storage(struct dcss_segment *seg)
246{
247 int i;
248
249 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
250 if (memory_chunk[i].type != CHUNK_READ_WRITE)
251 continue;
252 if ((memory_chunk[i].addr >> 20) > (seg->end >> 20))
253 continue;
254 if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20)
255 < (seg->start_addr >> 20))
256 continue;
257 return 1;
258 }
259 return 0;
260}
261
262/*
263 * check if segment collides with other segments that are currently loaded
264 * returns 1 if this is the case, 0 if no collision was found
265 */
266static int
267segment_overlaps_others (struct dcss_segment *seg)
268{
269 struct list_head *l;
270 struct dcss_segment *tmp;
271
272 BUG_ON(!mutex_is_locked(&dcss_lock));
273 list_for_each(l, &dcss_list) {
274 tmp = list_entry(l, struct dcss_segment, list);
275 if ((tmp->start_addr >> 20) > (seg->end >> 20))
276 continue;
277 if ((tmp->end >> 20) < (seg->start_addr >> 20))
278 continue;
279 if (seg == tmp)
280 continue;
281 return 1;
282 }
283 return 0;
284}
285
286/*
287 * check if segment exceeds the kernel mapping range (detected or set via mem=)
288 * returns 1 if this is the case, 0 if segment fits into the range
289 */
290static inline int
291segment_exceeds_range (struct dcss_segment *seg)
292{
293 int seg_last_pfn = (seg->end) >> PAGE_SHIFT;
294 if (seg_last_pfn > max_pfn)
295 return 1;
296 return 0;
297}
298
299/*
300 * get info about a segment 242 * get info about a segment
301 * possible return values: 243 * possible return values:
302 * -ENOSYS : we are not running on VM 244 * -ENOSYS : we are not running on VM
@@ -341,24 +283,26 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
341 rc = query_segment_type (seg); 283 rc = query_segment_type (seg);
342 if (rc < 0) 284 if (rc < 0)
343 goto out_free; 285 goto out_free;
344 if (segment_exceeds_range(seg)) { 286
345 PRINT_WARN ("segment_load: not loading segment %s - exceeds" 287 rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
346 " kernel mapping range\n",name); 288
347 rc = -ERANGE; 289 switch (rc) {
290 case 0:
291 break;
292 case -ENOSPC:
293 PRINT_WARN("segment_load: not loading segment %s - overlaps "
294 "storage/segment\n", name);
348 goto out_free; 295 goto out_free;
349 } 296 case -ERANGE:
350 if (segment_overlaps_storage(seg)) { 297 PRINT_WARN("segment_load: not loading segment %s - exceeds "
351 PRINT_WARN ("segment_load: not loading segment %s - overlaps" 298 "kernel mapping range\n", name);
352 " storage\n",name);
353 rc = -ENOSPC;
354 goto out_free; 299 goto out_free;
355 } 300 default:
356 if (segment_overlaps_others(seg)) { 301 PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
357 PRINT_WARN ("segment_load: not loading segment %s - overlaps" 302 name, rc);
358 " other segments\n",name);
359 rc = -EBUSY;
360 goto out_free; 303 goto out_free;
361 } 304 }
305
362 if (do_nonshared) 306 if (do_nonshared)
363 dcss_command = DCSS_LOADNSR; 307 dcss_command = DCSS_LOADNSR;
364 else 308 else
@@ -372,7 +316,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
372 rc = dcss_diag_translate_rc (seg->end); 316 rc = dcss_diag_translate_rc (seg->end);
373 dcss_diag(DCSS_PURGESEG, seg->dcss_name, 317 dcss_diag(DCSS_PURGESEG, seg->dcss_name,
374 &seg->start_addr, &seg->end); 318 &seg->start_addr, &seg->end);
375 goto out_free; 319 goto out_shared;
376 } 320 }
377 seg->do_nonshared = do_nonshared; 321 seg->do_nonshared = do_nonshared;
378 atomic_set(&seg->ref_count, 1); 322 atomic_set(&seg->ref_count, 1);
@@ -391,6 +335,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
391 (void*)seg->start_addr, (void*)seg->end, 335 (void*)seg->start_addr, (void*)seg->end,
392 segtype_string[seg->vm_segtype]); 336 segtype_string[seg->vm_segtype]);
393 goto out; 337 goto out;
338 out_shared:
339 remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
394 out_free: 340 out_free:
395 kfree(seg); 341 kfree(seg);
396 out: 342 out:
@@ -530,12 +476,12 @@ segment_unload(char *name)
530 "please report to linux390@de.ibm.com\n",name); 476 "please report to linux390@de.ibm.com\n",name);
531 goto out_unlock; 477 goto out_unlock;
532 } 478 }
533 if (atomic_dec_return(&seg->ref_count) == 0) { 479 if (atomic_dec_return(&seg->ref_count) != 0)
534 list_del(&seg->list); 480 goto out_unlock;
535 dcss_diag(DCSS_PURGESEG, seg->dcss_name, 481 remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
536 &dummy, &dummy); 482 list_del(&seg->list);
537 kfree(seg); 483 dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
538 } 484 kfree(seg);
539out_unlock: 485out_unlock:
540 mutex_unlock(&dcss_lock); 486 mutex_unlock(&dcss_lock);
541} 487}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index e1881c31b1cb..5ea12a573cad 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -69,6 +69,8 @@ void show_mem(void)
69 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 69 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
70 i = max_mapnr; 70 i = max_mapnr;
71 while (i-- > 0) { 71 while (i-- > 0) {
72 if (!pfn_valid(i))
73 continue;
72 page = pfn_to_page(i); 74 page = pfn_to_page(i);
73 total++; 75 total++;
74 if (PageReserved(page)) 76 if (PageReserved(page))
@@ -84,67 +86,53 @@ void show_mem(void)
84 printk("%d pages swap cached\n",cached); 86 printk("%d pages swap cached\n",cached);
85} 87}
86 88
89static void __init setup_ro_region(void)
90{
91 pgd_t *pgd;
92 pmd_t *pmd;
93 pte_t *pte;
94 pte_t new_pte;
95 unsigned long address, end;
96
97 address = ((unsigned long)&__start_rodata) & PAGE_MASK;
98 end = PFN_ALIGN((unsigned long)&__end_rodata);
99
100 for (; address < end; address += PAGE_SIZE) {
101 pgd = pgd_offset_k(address);
102 pmd = pmd_offset(pgd, address);
103 pte = pte_offset_kernel(pmd, address);
104 new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
105 set_pte(pte, new_pte);
106 }
107}
108
87extern unsigned long __initdata zholes_size[]; 109extern unsigned long __initdata zholes_size[];
110extern void vmem_map_init(void);
88/* 111/*
89 * paging_init() sets up the page tables 112 * paging_init() sets up the page tables
90 */ 113 */
91
92#ifndef CONFIG_64BIT
93void __init paging_init(void) 114void __init paging_init(void)
94{ 115{
95 pgd_t * pg_dir; 116 pgd_t *pg_dir;
96 pte_t * pg_table; 117 int i;
97 pte_t pte; 118 unsigned long pgdir_k;
98 int i; 119 static const int ssm_mask = 0x04000000L;
99 unsigned long tmp;
100 unsigned long pfn = 0;
101 unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
102 static const int ssm_mask = 0x04000000L;
103 unsigned long ro_start_pfn, ro_end_pfn;
104 unsigned long zones_size[MAX_NR_ZONES]; 120 unsigned long zones_size[MAX_NR_ZONES];
121 unsigned long dma_pfn, high_pfn;
105 122
106 ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); 123 pg_dir = swapper_pg_dir;
107 ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
108
109 memset(zones_size, 0, sizeof(zones_size));
110 zones_size[ZONE_DMA] = max_low_pfn;
111 free_area_init_node(0, &contig_page_data, zones_size,
112 __pa(PAGE_OFFSET) >> PAGE_SHIFT,
113 zholes_size);
114
115 /* unmap whole virtual address space */
116 124
117 pg_dir = swapper_pg_dir; 125#ifdef CONFIG_64BIT
118 126 pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE;
119 for (i = 0; i < PTRS_PER_PGD; i++) 127 for (i = 0; i < PTRS_PER_PGD; i++)
120 pmd_clear((pmd_t *) pg_dir++); 128 pgd_clear(pg_dir + i);
121 129#else
122 /* 130 pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
123 * map whole physical memory to virtual memory (identity mapping) 131 for (i = 0; i < PTRS_PER_PGD; i++)
124 */ 132 pmd_clear((pmd_t *)(pg_dir + i));
125 133#endif
126 pg_dir = swapper_pg_dir; 134 vmem_map_init();
127 135 setup_ro_region();
128 while (pfn < max_low_pfn) {
129 /*
130 * pg_table is physical at this point
131 */
132 pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
133
134 pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table);
135 pg_dir++;
136
137 for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
138 if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
139 pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
140 else
141 pte = pfn_pte(pfn, PAGE_KERNEL);
142 if (pfn >= max_low_pfn)
143 pte_val(pte) = _PAGE_TYPE_EMPTY;
144 set_pte(pg_table, pte);
145 pfn++;
146 }
147 }
148 136
149 S390_lowcore.kernel_asce = pgdir_k; 137 S390_lowcore.kernel_asce = pgdir_k;
150 138
@@ -154,31 +142,9 @@ void __init paging_init(void)
154 __ctl_load(pgdir_k, 13, 13); 142 __ctl_load(pgdir_k, 13, 13);
155 __raw_local_irq_ssm(ssm_mask); 143 __raw_local_irq_ssm(ssm_mask);
156 144
157 local_flush_tlb();
158}
159
160#else /* CONFIG_64BIT */
161
162void __init paging_init(void)
163{
164 pgd_t * pg_dir;
165 pmd_t * pm_dir;
166 pte_t * pt_dir;
167 pte_t pte;
168 int i,j,k;
169 unsigned long pfn = 0;
170 unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) |
171 _KERN_REGION_TABLE;
172 static const int ssm_mask = 0x04000000L;
173 unsigned long zones_size[MAX_NR_ZONES];
174 unsigned long dma_pfn, high_pfn;
175 unsigned long ro_start_pfn, ro_end_pfn;
176
177 memset(zones_size, 0, sizeof(zones_size)); 145 memset(zones_size, 0, sizeof(zones_size));
178 dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; 146 dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
179 high_pfn = max_low_pfn; 147 high_pfn = max_low_pfn;
180 ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
181 ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
182 148
183 if (dma_pfn > high_pfn) 149 if (dma_pfn > high_pfn)
184 zones_size[ZONE_DMA] = high_pfn; 150 zones_size[ZONE_DMA] = high_pfn;
@@ -190,56 +156,7 @@ void __init paging_init(void)
190 /* Initialize mem_map[]. */ 156 /* Initialize mem_map[]. */
191 free_area_init_node(0, &contig_page_data, zones_size, 157 free_area_init_node(0, &contig_page_data, zones_size,
192 __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); 158 __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
193
194 /*
195 * map whole physical memory to virtual memory (identity mapping)
196 */
197
198 pg_dir = swapper_pg_dir;
199
200 for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) {
201
202 if (pfn >= max_low_pfn) {
203 pgd_clear(pg_dir);
204 continue;
205 }
206
207 pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4);
208 pgd_populate(&init_mm, pg_dir, pm_dir);
209
210 for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) {
211 if (pfn >= max_low_pfn) {
212 pmd_clear(pm_dir);
213 continue;
214 }
215
216 pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
217 pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
218
219 for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) {
220 if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
221 pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
222 else
223 pte = pfn_pte(pfn, PAGE_KERNEL);
224 if (pfn >= max_low_pfn)
225 pte_val(pte) = _PAGE_TYPE_EMPTY;
226 set_pte(pt_dir, pte);
227 pfn++;
228 }
229 }
230 }
231
232 S390_lowcore.kernel_asce = pgdir_k;
233
234 /* enable virtual mapping in kernel mode */
235 __ctl_load(pgdir_k, 1, 1);
236 __ctl_load(pgdir_k, 7, 7);
237 __ctl_load(pgdir_k, 13, 13);
238 __raw_local_irq_ssm(ssm_mask);
239
240 local_flush_tlb();
241} 159}
242#endif /* CONFIG_64BIT */
243 160
244void __init mem_init(void) 161void __init mem_init(void)
245{ 162{
@@ -269,6 +186,8 @@ void __init mem_init(void)
269 printk("Write protected kernel read-only data: %#lx - %#lx\n", 186 printk("Write protected kernel read-only data: %#lx - %#lx\n",
270 (unsigned long)&__start_rodata, 187 (unsigned long)&__start_rodata,
271 PFN_ALIGN((unsigned long)&__end_rodata) - 1); 188 PFN_ALIGN((unsigned long)&__end_rodata) - 1);
189 printk("Virtual memmap size: %ldk\n",
190 (max_pfn * sizeof(struct page)) >> 10);
272} 191}
273 192
274void free_initmem(void) 193void free_initmem(void)
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
new file mode 100644
index 000000000000..7f2944d3ec2a
--- /dev/null
+++ b/arch/s390/mm/vmem.c
@@ -0,0 +1,381 @@
1/*
2 * arch/s390/mm/vmem.c
3 *
4 * Copyright IBM Corp. 2006
5 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
6 */
7
8#include <linux/bootmem.h>
9#include <linux/pfn.h>
10#include <linux/mm.h>
11#include <linux/module.h>
12#include <linux/list.h>
13#include <asm/pgalloc.h>
14#include <asm/pgtable.h>
15#include <asm/setup.h>
16#include <asm/tlbflush.h>
17
18unsigned long vmalloc_end;
19EXPORT_SYMBOL(vmalloc_end);
20
21static struct page *vmem_map;
22static DEFINE_MUTEX(vmem_mutex);
23
24struct memory_segment {
25 struct list_head list;
26 unsigned long start;
27 unsigned long size;
28};
29
30static LIST_HEAD(mem_segs);
31
32void memmap_init(unsigned long size, int nid, unsigned long zone,
33 unsigned long start_pfn)
34{
35 struct page *start, *end;
36 struct page *map_start, *map_end;
37 int i;
38
39 start = pfn_to_page(start_pfn);
40 end = start + size;
41
42 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
43 unsigned long cstart, cend;
44
45 cstart = PFN_DOWN(memory_chunk[i].addr);
46 cend = cstart + PFN_DOWN(memory_chunk[i].size);
47
48 map_start = mem_map + cstart;
49 map_end = mem_map + cend;
50
51 if (map_start < start)
52 map_start = start;
53 if (map_end > end)
54 map_end = end;
55
56 map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1))
57 / sizeof(struct page);
58 map_end += ((PFN_ALIGN((unsigned long) map_end)
59 - (unsigned long) map_end)
60 / sizeof(struct page));
61
62 if (map_start < map_end)
63 memmap_init_zone((unsigned long)(map_end - map_start),
64 nid, zone, page_to_pfn(map_start));
65 }
66}
67
68static inline void *vmem_alloc_pages(unsigned int order)
69{
70 if (slab_is_available())
71 return (void *)__get_free_pages(GFP_KERNEL, order);
72 return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
73}
74
75static inline pmd_t *vmem_pmd_alloc(void)
76{
77 pmd_t *pmd;
78 int i;
79
80 pmd = vmem_alloc_pages(PMD_ALLOC_ORDER);
81 if (!pmd)
82 return NULL;
83 for (i = 0; i < PTRS_PER_PMD; i++)
84 pmd_clear(pmd + i);
85 return pmd;
86}
87
88static inline pte_t *vmem_pte_alloc(void)
89{
90 pte_t *pte;
91 pte_t empty_pte;
92 int i;
93
94 pte = vmem_alloc_pages(PTE_ALLOC_ORDER);
95 if (!pte)
96 return NULL;
97 pte_val(empty_pte) = _PAGE_TYPE_EMPTY;
98 for (i = 0; i < PTRS_PER_PTE; i++)
99 set_pte(pte + i, empty_pte);
100 return pte;
101}
102
103/*
104 * Add a physical memory range to the 1:1 mapping.
105 */
106static int vmem_add_range(unsigned long start, unsigned long size)
107{
108 unsigned long address;
109 pgd_t *pg_dir;
110 pmd_t *pm_dir;
111 pte_t *pt_dir;
112 pte_t pte;
113 int ret = -ENOMEM;
114
115 for (address = start; address < start + size; address += PAGE_SIZE) {
116 pg_dir = pgd_offset_k(address);
117 if (pgd_none(*pg_dir)) {
118 pm_dir = vmem_pmd_alloc();
119 if (!pm_dir)
120 goto out;
121 pgd_populate(&init_mm, pg_dir, pm_dir);
122 }
123
124 pm_dir = pmd_offset(pg_dir, address);
125 if (pmd_none(*pm_dir)) {
126 pt_dir = vmem_pte_alloc();
127 if (!pt_dir)
128 goto out;
129 pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
130 }
131
132 pt_dir = pte_offset_kernel(pm_dir, address);
133 pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
134 set_pte(pt_dir, pte);
135 }
136 ret = 0;
137out:
138 flush_tlb_kernel_range(start, start + size);
139 return ret;
140}
141
142/*
143 * Remove a physical memory range from the 1:1 mapping.
144 * Currently only invalidates page table entries.
145 */
146static void vmem_remove_range(unsigned long start, unsigned long size)
147{
148 unsigned long address;
149 pgd_t *pg_dir;
150 pmd_t *pm_dir;
151 pte_t *pt_dir;
152 pte_t pte;
153
154 pte_val(pte) = _PAGE_TYPE_EMPTY;
155 for (address = start; address < start + size; address += PAGE_SIZE) {
156 pg_dir = pgd_offset_k(address);
157 if (pgd_none(*pg_dir))
158 continue;
159 pm_dir = pmd_offset(pg_dir, address);
160 if (pmd_none(*pm_dir))
161 continue;
162 pt_dir = pte_offset_kernel(pm_dir, address);
163 set_pte(pt_dir, pte);
164 }
165 flush_tlb_kernel_range(start, start + size);
166}
167
168/*
169 * Add a backed mem_map array to the virtual mem_map array.
170 */
171static int vmem_add_mem_map(unsigned long start, unsigned long size)
172{
173 unsigned long address, start_addr, end_addr;
174 struct page *map_start, *map_end;
175 pgd_t *pg_dir;
176 pmd_t *pm_dir;
177 pte_t *pt_dir;
178 pte_t pte;
179 int ret = -ENOMEM;
180
181 map_start = vmem_map + PFN_DOWN(start);
182 map_end = vmem_map + PFN_DOWN(start + size);
183
184 start_addr = (unsigned long) map_start & PAGE_MASK;
185 end_addr = PFN_ALIGN((unsigned long) map_end);
186
187 for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
188 pg_dir = pgd_offset_k(address);
189 if (pgd_none(*pg_dir)) {
190 pm_dir = vmem_pmd_alloc();
191 if (!pm_dir)
192 goto out;
193 pgd_populate(&init_mm, pg_dir, pm_dir);
194 }
195
196 pm_dir = pmd_offset(pg_dir, address);
197 if (pmd_none(*pm_dir)) {
198 pt_dir = vmem_pte_alloc();
199 if (!pt_dir)
200 goto out;
201 pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
202 }
203
204 pt_dir = pte_offset_kernel(pm_dir, address);
205 if (pte_none(*pt_dir)) {
206 unsigned long new_page;
207
208 new_page =__pa(vmem_alloc_pages(0));
209 if (!new_page)
210 goto out;
211 pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
212 set_pte(pt_dir, pte);
213 }
214 }
215 ret = 0;
216out:
217 flush_tlb_kernel_range(start_addr, end_addr);
218 return ret;
219}
220
221static int vmem_add_mem(unsigned long start, unsigned long size)
222{
223 int ret;
224
225 ret = vmem_add_range(start, size);
226 if (ret)
227 return ret;
228 return vmem_add_mem_map(start, size);
229}
230
231/*
232 * Add memory segment to the segment list if it doesn't overlap with
233 * an already present segment.
234 */
235static int insert_memory_segment(struct memory_segment *seg)
236{
237 struct memory_segment *tmp;
238
239 if (PFN_DOWN(seg->start + seg->size) > max_pfn ||
240 seg->start + seg->size < seg->start)
241 return -ERANGE;
242
243 list_for_each_entry(tmp, &mem_segs, list) {
244 if (seg->start >= tmp->start + tmp->size)
245 continue;
246 if (seg->start + seg->size <= tmp->start)
247 continue;
248 return -ENOSPC;
249 }
250 list_add(&seg->list, &mem_segs);
251 return 0;
252}
253
254/*
255 * Remove memory segment from the segment list.
256 */
257static void remove_memory_segment(struct memory_segment *seg)
258{
259 list_del(&seg->list);
260}
261
262static void __remove_shared_memory(struct memory_segment *seg)
263{
264 remove_memory_segment(seg);
265 vmem_remove_range(seg->start, seg->size);
266}
267
268int remove_shared_memory(unsigned long start, unsigned long size)
269{
270 struct memory_segment *seg;
271 int ret;
272
273 mutex_lock(&vmem_mutex);
274
275 ret = -ENOENT;
276 list_for_each_entry(seg, &mem_segs, list) {
277 if (seg->start == start && seg->size == size)
278 break;
279 }
280
281 if (seg->start != start || seg->size != size)
282 goto out;
283
284 ret = 0;
285 __remove_shared_memory(seg);
286 kfree(seg);
287out:
288 mutex_unlock(&vmem_mutex);
289 return ret;
290}
291
292int add_shared_memory(unsigned long start, unsigned long size)
293{
294 struct memory_segment *seg;
295 struct page *page;
296 unsigned long pfn, num_pfn, end_pfn;
297 int ret;
298
299 mutex_lock(&vmem_mutex);
300 ret = -ENOMEM;
301 seg = kzalloc(sizeof(*seg), GFP_KERNEL);
302 if (!seg)
303 goto out;
304 seg->start = start;
305 seg->size = size;
306
307 ret = insert_memory_segment(seg);
308 if (ret)
309 goto out_free;
310
311 ret = vmem_add_mem(start, size);
312 if (ret)
313 goto out_remove;
314
315 pfn = PFN_DOWN(start);
316 num_pfn = PFN_DOWN(size);
317 end_pfn = pfn + num_pfn;
318
319 page = pfn_to_page(pfn);
320 memset(page, 0, num_pfn * sizeof(struct page));
321
322 for (; pfn < end_pfn; pfn++) {
323 page = pfn_to_page(pfn);
324 init_page_count(page);
325 reset_page_mapcount(page);
326 SetPageReserved(page);
327 INIT_LIST_HEAD(&page->lru);
328 }
329 goto out;
330
331out_remove:
332 __remove_shared_memory(seg);
333out_free:
334 kfree(seg);
335out:
336 mutex_unlock(&vmem_mutex);
337 return ret;
338}
339
340/*
341 * map whole physical memory to virtual memory (identity mapping)
342 */
343void __init vmem_map_init(void)
344{
345 unsigned long map_size;
346 int i;
347
348 map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page);
349 vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size);
350 vmem_map = (struct page *) vmalloc_end;
351 NODE_DATA(0)->node_mem_map = vmem_map;
352
353 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
354 vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
355}
356
357/*
358 * Convert memory chunk array to a memory segment list so there is a single
359 * list that contains both r/w memory and shared memory segments.
360 */
361static int __init vmem_convert_memory_chunk(void)
362{
363 struct memory_segment *seg;
364 int i;
365
366 mutex_lock(&vmem_mutex);
367 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
368 if (!memory_chunk[i].size)
369 continue;
370 seg = kzalloc(sizeof(*seg), GFP_KERNEL);
371 if (!seg)
372 panic("Out of memory...\n");
373 seg->start = memory_chunk[i].addr;
374 seg->size = memory_chunk[i].size;
375 insert_memory_segment(seg);
376 }
377 mutex_unlock(&vmem_mutex);
378 return 0;
379}
380
381core_initcall(vmem_convert_memory_chunk);
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index 363ea761d5ee..05ea6f172786 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -127,6 +127,26 @@ page_get_storage_key(unsigned long addr)
127 return skey; 127 return skey;
128} 128}
129 129
130extern unsigned long max_pfn;
131
132static inline int pfn_valid(unsigned long pfn)
133{
134 unsigned long dummy;
135 int ccode;
136
137 if (pfn >= max_pfn)
138 return 0;
139
140 asm volatile(
141 " lra %0,0(%2)\n"
142 " ipm %1\n"
143 " srl %1,28\n"
144 : "=d" (dummy), "=d" (ccode)
145 : "a" (pfn << PAGE_SHIFT)
146 : "cc");
147 return !ccode;
148}
149
130#endif /* !__ASSEMBLY__ */ 150#endif /* !__ASSEMBLY__ */
131 151
132/* to align the pointer to the (next) page boundary */ 152/* to align the pointer to the (next) page boundary */
@@ -138,8 +158,6 @@ page_get_storage_key(unsigned long addr)
138#define __va(x) (void *)(unsigned long)(x) 158#define __va(x) (void *)(unsigned long)(x)
139#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) 159#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
140#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 160#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
141
142#define pfn_valid(pfn) ((pfn) < max_mapnr)
143#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) 161#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
144 162
145#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ 163#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index 28619de5ecae..0707a7e2fc16 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -25,8 +25,11 @@ extern void diag10(unsigned long addr);
25 * Page allocation orders. 25 * Page allocation orders.
26 */ 26 */
27#ifndef __s390x__ 27#ifndef __s390x__
28# define PTE_ALLOC_ORDER 0
29# define PMD_ALLOC_ORDER 0
28# define PGD_ALLOC_ORDER 1 30# define PGD_ALLOC_ORDER 1
29#else /* __s390x__ */ 31#else /* __s390x__ */
32# define PTE_ALLOC_ORDER 0
30# define PMD_ALLOC_ORDER 2 33# define PMD_ALLOC_ORDER 2
31# define PGD_ALLOC_ORDER 2 34# define PGD_ALLOC_ORDER 2
32#endif /* __s390x__ */ 35#endif /* __s390x__ */
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 2d968a69ed1f..ae61aca5d483 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -107,23 +107,25 @@ extern char empty_zero_page[PAGE_SIZE];
107 * The vmalloc() routines leaves a hole of 4kB between each vmalloced 107 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
108 * area for the same reason. ;) 108 * area for the same reason. ;)
109 */ 109 */
110extern unsigned long vmalloc_end;
110#define VMALLOC_OFFSET (8*1024*1024) 111#define VMALLOC_OFFSET (8*1024*1024)
111#define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \ 112#define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \
112 & ~(VMALLOC_OFFSET-1)) 113 & ~(VMALLOC_OFFSET-1))
114#define VMALLOC_END vmalloc_end
113 115
114/* 116/*
115 * We need some free virtual space to be able to do vmalloc. 117 * We need some free virtual space to be able to do vmalloc.
116 * VMALLOC_MIN_SIZE defines the minimum size of the vmalloc 118 * VMALLOC_MIN_SIZE defines the minimum size of the vmalloc
117 * area. On a machine with 2GB memory we make sure that we 119 * area. On a machine with 2GB memory we make sure that we
118 * have at least 128MB free space for vmalloc. On a machine 120 * have at least 128MB free space for vmalloc. On a machine
119 * with 4TB we make sure we have at least 1GB. 121 * with 4TB we make sure we have at least 128GB.
120 */ 122 */
121#ifndef __s390x__ 123#ifndef __s390x__
122#define VMALLOC_MIN_SIZE 0x8000000UL 124#define VMALLOC_MIN_SIZE 0x8000000UL
123#define VMALLOC_END 0x80000000UL 125#define VMALLOC_END_INIT 0x80000000UL
124#else /* __s390x__ */ 126#else /* __s390x__ */
125#define VMALLOC_MIN_SIZE 0x40000000UL 127#define VMALLOC_MIN_SIZE 0x2000000000UL
126#define VMALLOC_END 0x40000000000UL 128#define VMALLOC_END_INIT 0x40000000000UL
127#endif /* __s390x__ */ 129#endif /* __s390x__ */
128 130
129/* 131/*
@@ -815,11 +817,17 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
815 817
816#define kern_addr_valid(addr) (1) 818#define kern_addr_valid(addr) (1)
817 819
820extern int add_shared_memory(unsigned long start, unsigned long size);
821extern int remove_shared_memory(unsigned long start, unsigned long size);
822
818/* 823/*
819 * No page table caches to initialise 824 * No page table caches to initialise
820 */ 825 */
821#define pgtable_cache_init() do { } while (0) 826#define pgtable_cache_init() do { } while (0)
822 827
828#define __HAVE_ARCH_MEMMAP_INIT
829extern void memmap_init(unsigned long, int, unsigned long, unsigned long);
830
823#define __HAVE_ARCH_PTEP_ESTABLISH 831#define __HAVE_ARCH_PTEP_ESTABLISH
824#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 832#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
825#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 833#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG