aboutsummaryrefslogtreecommitdiffstats
path: root/mm/sparse-vmemmap.c
blob: 7bb7a4b96d74cb712a188c8d45d6c2cf447210f0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*
 * Virtual Memory Map support
 *
 * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>.
 *
 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
 * virt_to_page, page_address() to be implemented as a base offset
 * calculation without memory access.
 *
 * However, virtual mappings need a page table and TLBs. Many Linux
 * architectures already map their physical space using 1-1 mappings
 * via TLBs. For those arches the virtual memmory map is essentially
 * for free if we use the same page size as the 1-1 mappings. In that
 * case the overhead consists of a few additional pages that are
 * allocated to create a view of memory for vmemmap.
 *
 * Special Kconfig settings:
 *
 * CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP
 *
 * 	The architecture has its own functions to populate the memory
 * 	map and provides a vmemmap_populate function.
 *
 * CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD
 *
 * 	The architecture provides functions to populate the pmd level
 * 	of the vmemmap mappings.  Allowing mappings using large pages
 * 	where available.
 *
 * 	If neither are set then PAGE_SIZE mappings are generated which
 * 	require one PTE/TLB per PAGE_SIZE chunk of the virtual memory map.
 */
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <asm/dma.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>

/*
 * Allocate a block of memory to be used to back the virtual memory map
 * or to back the page tables that are used to create the mapping.
 * Uses the main allocators if they are available, else bootmem.
 */
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{
	/* If the main allocator is up use that, fallback to bootmem. */
	if (slab_is_available()) {
		struct page *page = alloc_pages_node(node,
				GFP_KERNEL | __GFP_ZERO, get_order(size));
		if (page)
			return page_address(page);
		return NULL;
	} else
		return __alloc_bootmem_node(NODE_DATA(node), size, size,
				__pa(MAX_DMA_ADDRESS));
}

#ifndef CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP
void __meminit vmemmap_verify(pte_t *pte, int node,
				unsigned long start, unsigned long end)
{
	unsigned long pfn = pte_pfn(*pte);
	int actual_node = early_pfn_to_nid(pfn);

	if (actual_node != node)
		printk(KERN_WARNING "[%lx-%lx] potential offnode "
			"page_structs\n", start, end - 1);
}

#ifndef CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD
static int __meminit vmemmap_populate_pte(pmd_t *pmd, unsigned long addr,
					unsigned long end, int node)
{
	pte_t *pte;

	for (pte = pte_offset_kernel(pmd, addr); addr < end;
						pte++, addr += PAGE_SIZE)
		if (pte_none(*pte)) {
			pte_t entry;
			void *p = vmemmap_alloc_block(PAGE_SIZE, node);
			if (!p)
				return -ENOMEM;

			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
			set_pte(pte, entry);

		} else
			vmemmap_verify(pte, node, addr + PAGE_SIZE, end);

	return 0;
}

int __meminit vmemmap_populate_pmd(pud_t *pud, unsigned long addr,
						unsigned long end, int node)
{
	pmd_t *pmd;
	int error = 0;
	unsigned long next;

	for (pmd = pmd_offset(pud, addr); addr < end && !error;
						pmd++, addr = next) {
		if (pmd_none(*pmd)) {
			void *p = vmemmap_alloc_block(PAGE_SIZE, node);
			if (!p)
				return -ENOMEM;

			pmd_populate_kernel(&init_mm, pmd, p);
		} else
			vmemmap_verify((pte_t *)pmd, node,
					pmd_addr_end(addr, end), end);
		next = pmd_addr_end(addr, end);
		error = vmemmap_populate_pte(pmd, addr, next, node);
	}
	return error;
}
#endif /* CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP_PMD */

static int __meminit vmemmap_populate_pud(pgd_t *pgd, unsigned long addr,
						unsigned long end, int node)
{
	pud_t *pud;
	int error = 0;
	unsigned long next;

	for (pud = pud_offset(pgd, addr); addr < end && !error;
						pud++, addr = next) {
		if (pud_none(*pud)) {
			void *p = vmemmap_alloc_block(PAGE_SIZE, node);
			if (!p)
				return -ENOMEM;

			pud_populate(&init_mm, pud, p);
		}
		next = pud_addr_end(addr, end);
		error = vmemmap_populate_pmd(pud, addr, next, node);
	}
	return error;
}

int __meminit vmemmap_populate(struct page *start_page,
						unsigned long nr, int node)
{
	pgd_t *pgd;
	unsigned long addr = (unsigned long)start_page;
	unsigned long end = (unsigned long)(start_page + nr);
	unsigned long next;
	int error = 0;

	printk(KERN_DEBUG "[%lx-%lx] Virtual memory section"
		" (%ld pages) node %d\n", addr, end - 1, nr, node);

	for (pgd = pgd_offset_k(addr); addr < end && !error;
					pgd++, addr = next) {
		if (pgd_none(*pgd)) {
			void *p = vmemmap_alloc_block(PAGE_SIZE, node);
			if (!p)
				return -ENOMEM;

			pgd_populate(&init_mm, pgd, p);
		}
		next = pgd_addr_end(addr,end);
		error = vmemmap_populate_pud(pgd, addr, next, node);
	}
	return error;
}
#endif /* !CONFIG_ARCH_POPULATES_SPARSEMEM_VMEMMAP */

struct page __init *sparse_early_mem_map_populate(unsigned long pnum, int nid)
{
	struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION);
	int error = vmemmap_populate(map, PAGES_PER_SECTION, nid);
	if (error)
		return NULL;

	return map;
}