aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/asm-generic/memory_model.h6
-rw-r--r--include/linux/mm.h6
-rw-r--r--mm/Makefile1
-rw-r--r--mm/sparse-vmemmap.c181
-rw-r--r--mm/sparse.c21
5 files changed, 211 insertions, 4 deletions
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 30d8d33491dd..52226e14bd7d 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -46,6 +46,12 @@
46 __pgdat->node_start_pfn; \ 46 __pgdat->node_start_pfn; \
47}) 47})
48 48
49#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
50
51/* memmap is virtually contigious. */
52#define __pfn_to_page(pfn) (vmemmap + (pfn))
53#define __page_to_pfn(page) ((page) - vmemmap)
54
49#elif defined(CONFIG_SPARSEMEM) 55#elif defined(CONFIG_SPARSEMEM)
50/* 56/*
51 * Note: section's mem_map is encorded to reflect its start_pfn. 57 * Note: section's mem_map is encorded to reflect its start_pfn.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1692dd6cb915..d216abbd0574 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1218,5 +1218,11 @@ extern int randomize_va_space;
1218 1218
1219const char * arch_vma_name(struct vm_area_struct *vma); 1219const char * arch_vma_name(struct vm_area_struct *vma);
1220 1220
1221struct page *sparse_early_mem_map_populate(unsigned long pnum, int nid);
1222int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
1223int vmemmap_populate_pmd(pud_t *, unsigned long, unsigned long, int);
1224void *vmemmap_alloc_block(unsigned long size, int node);
1225void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
1226
1221#endif /* __KERNEL__ */ 1227#endif /* __KERNEL__ */
1222#endif /* _LINUX_MM_H */ 1228#endif /* _LINUX_MM_H */
diff --git a/mm/Makefile b/mm/Makefile
index 245e33ab00c4..d28f63e05b46 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
18obj-$(CONFIG_HUGETLBFS) += hugetlb.o 18obj-$(CONFIG_HUGETLBFS) += hugetlb.o
19obj-$(CONFIG_NUMA) += mempolicy.o 19obj-$(CONFIG_NUMA) += mempolicy.o
20obj-$(CONFIG_SPARSEMEM) += sparse.o 20obj-$(CONFIG_SPARSEMEM) += sparse.o
21obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
21obj-$(CONFIG_SHMEM) += shmem.o 22obj-$(CONFIG_SHMEM) += shmem.o
22obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o 23obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
23obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o 24obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
new file mode 100644
index 000000000000..7bb7a4b96d74
--- /dev/null
+++ b/mm/sparse-vmemmap.c
@@ -0,0 +1,181 @@
1/*
2 * Virtual Memory Map support
3 *
4 * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>.
5 *
6 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
7 * virt_to_page, page_address() to be implemented as a base offset
8 * calculation without memory access.
9 *
10 * However, virtual mappings need a page table and TLBs. Many Linux
11 * architectures already map their physical space using 1-1 mappings
12 * via TLBs. For those arches the virtual memmory map is essentially
13 * for free if we use the same page size as the 1-1 mappings. In that
14 * case the overhead consists of a few additional pages that are
15 * allocated to create a view of memory for vmemmap.
16 *
17 * Special Kconfig settings:
18 *
19 * CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP
20 *
21 * The architecture has its own functions to populate the memory
22 * map and provides a vmemmap_populate function.
23 *
24 * CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD
25 *
26 * The architecture provides functions to populate the pmd level
27 * of the vmemmap mappings. Allowing mappings using large pages
28 * where available.
29 *
30 * If neither are set then PAGE_SIZE mappings are generated which
31 * require one PTE/TLB per PAGE_SIZE chunk of the virtual memory map.
32 */
33#include <linux/mm.h>
34#include <linux/mmzone.h>
35#include <linux/bootmem.h>
36#include <linux/highmem.h>
37#include <linux/module.h>
38#include <linux/spinlock.h>
39#include <linux/vmalloc.h>
40#include <asm/dma.h>
41#include <asm/pgalloc.h>
42#include <asm/pgtable.h>
43
44/*
45 * Allocate a block of memory to be used to back the virtual memory map
46 * or to back the page tables that are used to create the mapping.
47 * Uses the main allocators if they are available, else bootmem.
48 */
49void * __meminit vmemmap_alloc_block(unsigned long size, int node)
50{
51 /* If the main allocator is up use that, fallback to bootmem. */
52 if (slab_is_available()) {
53 struct page *page = alloc_pages_node(node,
54 GFP_KERNEL | __GFP_ZERO, get_order(size));
55 if (page)
56 return page_address(page);
57 return NULL;
58 } else
59 return __alloc_bootmem_node(NODE_DATA(node), size, size,
60 __pa(MAX_DMA_ADDRESS));
61}
62
63#ifndef CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP
64void __meminit vmemmap_verify(pte_t *pte, int node,
65 unsigned long start, unsigned long end)
66{
67 unsigned long pfn = pte_pfn(*pte);
68 int actual_node = early_pfn_to_nid(pfn);
69
70 if (actual_node != node)
71 printk(KERN_WARNING "[%lx-%lx] potential offnode "
72 "page_structs\n", start, end - 1);
73}
74
75#ifndef CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD
76static int __meminit vmemmap_populate_pte(pmd_t *pmd, unsigned long addr,
77 unsigned long end, int node)
78{
79 pte_t *pte;
80
81 for (pte = pte_offset_kernel(pmd, addr); addr < end;
82 pte++, addr += PAGE_SIZE)
83 if (pte_none(*pte)) {
84 pte_t entry;
85 void *p = vmemmap_alloc_block(PAGE_SIZE, node);
86 if (!p)
87 return -ENOMEM;
88
89 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
90 set_pte(pte, entry);
91
92 } else
93 vmemmap_verify(pte, node, addr + PAGE_SIZE, end);
94
95 return 0;
96}
97
98int __meminit vmemmap_populate_pmd(pud_t *pud, unsigned long addr,
99 unsigned long end, int node)
100{
101 pmd_t *pmd;
102 int error = 0;
103 unsigned long next;
104
105 for (pmd = pmd_offset(pud, addr); addr < end && !error;
106 pmd++, addr = next) {
107 if (pmd_none(*pmd)) {
108 void *p = vmemmap_alloc_block(PAGE_SIZE, node);
109 if (!p)
110 return -ENOMEM;
111
112 pmd_populate_kernel(&init_mm, pmd, p);
113 } else
114 vmemmap_verify((pte_t *)pmd, node,
115 pmd_addr_end(addr, end), end);
116 next = pmd_addr_end(addr, end);
117 error = vmemmap_populate_pte(pmd, addr, next, node);
118 }
119 return error;
120}
121#endif /* CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD */
122
123static int __meminit vmemmap_populate_pud(pgd_t *pgd, unsigned long addr,
124 unsigned long end, int node)
125{
126 pud_t *pud;
127 int error = 0;
128 unsigned long next;
129
130 for (pud = pud_offset(pgd, addr); addr < end && !error;
131 pud++, addr = next) {
132 if (pud_none(*pud)) {
133 void *p = vmemmap_alloc_block(PAGE_SIZE, node);
134 if (!p)
135 return -ENOMEM;
136
137 pud_populate(&init_mm, pud, p);
138 }
139 next = pud_addr_end(addr, end);
140 error = vmemmap_populate_pmd(pud, addr, next, node);
141 }
142 return error;
143}
144
145int __meminit vmemmap_populate(struct page *start_page,
146 unsigned long nr, int node)
147{
148 pgd_t *pgd;
149 unsigned long addr = (unsigned long)start_page;
150 unsigned long end = (unsigned long)(start_page + nr);
151 unsigned long next;
152 int error = 0;
153
154 printk(KERN_DEBUG "[%lx-%lx] Virtual memory section"
155 " (%ld pages) node %d\n", addr, end - 1, nr, node);
156
157 for (pgd = pgd_offset_k(addr); addr < end && !error;
158 pgd++, addr = next) {
159 if (pgd_none(*pgd)) {
160 void *p = vmemmap_alloc_block(PAGE_SIZE, node);
161 if (!p)
162 return -ENOMEM;
163
164 pgd_populate(&init_mm, pgd, p);
165 }
166 next = pgd_addr_end(addr,end);
167 error = vmemmap_populate_pud(pgd, addr, next, node);
168 }
169 return error;
170}
171#endif /* !CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP */
172
173struct page __init *sparse_early_mem_map_populate(unsigned long pnum, int nid)
174{
175 struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION);
176 int error = vmemmap_populate(map, PAGES_PER_SECTION, nid);
177 if (error)
178 return NULL;
179
180 return map;
181}
diff --git a/mm/sparse.c b/mm/sparse.c
index 54f3940406cb..52843a76feed 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -9,6 +9,8 @@
9#include <linux/spinlock.h> 9#include <linux/spinlock.h>
10#include <linux/vmalloc.h> 10#include <linux/vmalloc.h>
11#include <asm/dma.h> 11#include <asm/dma.h>
12#include <asm/pgalloc.h>
13#include <asm/pgtable.h>
12 14
13/* 15/*
14 * Permanent SPARSEMEM data: 16 * Permanent SPARSEMEM data:
@@ -222,11 +224,10 @@ void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
222 return NULL; 224 return NULL;
223} 225}
224 226
225static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) 227#ifndef CONFIG_SPARSEMEM_VMEMMAP
228struct page __init *sparse_early_mem_map_populate(unsigned long pnum, int nid)
226{ 229{
227 struct page *map; 230 struct page *map;
228 struct mem_section *ms = __nr_to_section(pnum);
229 int nid = sparse_early_nid(ms);
230 231
231 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); 232 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
232 if (map) 233 if (map)
@@ -239,10 +240,22 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
239 240
240 map = alloc_bootmem_node(NODE_DATA(nid), 241 map = alloc_bootmem_node(NODE_DATA(nid),
241 sizeof(struct page) * PAGES_PER_SECTION); 242 sizeof(struct page) * PAGES_PER_SECTION);
243 return map;
244}
245#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
246
247struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
248{
249 struct page *map;
250 struct mem_section *ms = __nr_to_section(pnum);
251 int nid = sparse_early_nid(ms);
252
253 map = sparse_early_mem_map_populate(pnum, nid);
242 if (map) 254 if (map)
243 return map; 255 return map;
244 256
245 printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); 257 printk(KERN_ERR "%s: sparsemem memory map backing failed "
258 "some memory will not be available.\n", __FUNCTION__);
246 ms->section_mem_map = 0; 259 ms->section_mem_map = 0;
247 return NULL; 260 return NULL;
248} 261}