diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c new file mode 100644 index 000000000000..4eb5ae3fbe10 --- /dev/null +++ b/mm/hugetlb.c | |||
@@ -0,0 +1,260 @@ | |||
1 | /* | ||
2 | * Generic hugetlb support. | ||
3 | * (C) William Irwin, April 2004 | ||
4 | */ | ||
5 | #include <linux/gfp.h> | ||
6 | #include <linux/list.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/hugetlb.h> | ||
11 | #include <linux/sysctl.h> | ||
12 | #include <linux/highmem.h> | ||
13 | #include <linux/nodemask.h> | ||
14 | |||
15 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | ||
16 | static unsigned long nr_huge_pages, free_huge_pages; | ||
17 | unsigned long max_huge_pages; | ||
18 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | ||
19 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | ||
20 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; | ||
21 | static DEFINE_SPINLOCK(hugetlb_lock); | ||
22 | |||
23 | static void enqueue_huge_page(struct page *page) | ||
24 | { | ||
25 | int nid = page_to_nid(page); | ||
26 | list_add(&page->lru, &hugepage_freelists[nid]); | ||
27 | free_huge_pages++; | ||
28 | free_huge_pages_node[nid]++; | ||
29 | } | ||
30 | |||
31 | static struct page *dequeue_huge_page(void) | ||
32 | { | ||
33 | int nid = numa_node_id(); | ||
34 | struct page *page = NULL; | ||
35 | |||
36 | if (list_empty(&hugepage_freelists[nid])) { | ||
37 | for (nid = 0; nid < MAX_NUMNODES; ++nid) | ||
38 | if (!list_empty(&hugepage_freelists[nid])) | ||
39 | break; | ||
40 | } | ||
41 | if (nid >= 0 && nid < MAX_NUMNODES && | ||
42 | !list_empty(&hugepage_freelists[nid])) { | ||
43 | page = list_entry(hugepage_freelists[nid].next, | ||
44 | struct page, lru); | ||
45 | list_del(&page->lru); | ||
46 | free_huge_pages--; | ||
47 | free_huge_pages_node[nid]--; | ||
48 | } | ||
49 | return page; | ||
50 | } | ||
51 | |||
52 | static struct page *alloc_fresh_huge_page(void) | ||
53 | { | ||
54 | static int nid = 0; | ||
55 | struct page *page; | ||
56 | page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, | ||
57 | HUGETLB_PAGE_ORDER); | ||
58 | nid = (nid + 1) % num_online_nodes(); | ||
59 | if (page) { | ||
60 | nr_huge_pages++; | ||
61 | nr_huge_pages_node[page_to_nid(page)]++; | ||
62 | } | ||
63 | return page; | ||
64 | } | ||
65 | |||
66 | void free_huge_page(struct page *page) | ||
67 | { | ||
68 | BUG_ON(page_count(page)); | ||
69 | |||
70 | INIT_LIST_HEAD(&page->lru); | ||
71 | page[1].mapping = NULL; | ||
72 | |||
73 | spin_lock(&hugetlb_lock); | ||
74 | enqueue_huge_page(page); | ||
75 | spin_unlock(&hugetlb_lock); | ||
76 | } | ||
77 | |||
78 | struct page *alloc_huge_page(void) | ||
79 | { | ||
80 | struct page *page; | ||
81 | int i; | ||
82 | |||
83 | spin_lock(&hugetlb_lock); | ||
84 | page = dequeue_huge_page(); | ||
85 | if (!page) { | ||
86 | spin_unlock(&hugetlb_lock); | ||
87 | return NULL; | ||
88 | } | ||
89 | spin_unlock(&hugetlb_lock); | ||
90 | set_page_count(page, 1); | ||
91 | page[1].mapping = (void *)free_huge_page; | ||
92 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) | ||
93 | clear_highpage(&page[i]); | ||
94 | return page; | ||
95 | } | ||
96 | |||
97 | static int __init hugetlb_init(void) | ||
98 | { | ||
99 | unsigned long i; | ||
100 | struct page *page; | ||
101 | |||
102 | for (i = 0; i < MAX_NUMNODES; ++i) | ||
103 | INIT_LIST_HEAD(&hugepage_freelists[i]); | ||
104 | |||
105 | for (i = 0; i < max_huge_pages; ++i) { | ||
106 | page = alloc_fresh_huge_page(); | ||
107 | if (!page) | ||
108 | break; | ||
109 | spin_lock(&hugetlb_lock); | ||
110 | enqueue_huge_page(page); | ||
111 | spin_unlock(&hugetlb_lock); | ||
112 | } | ||
113 | max_huge_pages = free_huge_pages = nr_huge_pages = i; | ||
114 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); | ||
115 | return 0; | ||
116 | } | ||
117 | module_init(hugetlb_init); | ||
118 | |||
119 | static int __init hugetlb_setup(char *s) | ||
120 | { | ||
121 | if (sscanf(s, "%lu", &max_huge_pages) <= 0) | ||
122 | max_huge_pages = 0; | ||
123 | return 1; | ||
124 | } | ||
125 | __setup("hugepages=", hugetlb_setup); | ||
126 | |||
127 | #ifdef CONFIG_SYSCTL | ||
128 | static void update_and_free_page(struct page *page) | ||
129 | { | ||
130 | int i; | ||
131 | nr_huge_pages--; | ||
132 | nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--; | ||
133 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { | ||
134 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | | ||
135 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | | ||
136 | 1 << PG_private | 1<< PG_writeback); | ||
137 | set_page_count(&page[i], 0); | ||
138 | } | ||
139 | set_page_count(page, 1); | ||
140 | __free_pages(page, HUGETLB_PAGE_ORDER); | ||
141 | } | ||
142 | |||
143 | #ifdef CONFIG_HIGHMEM | ||
144 | static void try_to_free_low(unsigned long count) | ||
145 | { | ||
146 | int i, nid; | ||
147 | for (i = 0; i < MAX_NUMNODES; ++i) { | ||
148 | struct page *page, *next; | ||
149 | list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { | ||
150 | if (PageHighMem(page)) | ||
151 | continue; | ||
152 | list_del(&page->lru); | ||
153 | update_and_free_page(page); | ||
154 | nid = page_zone(page)->zone_pgdat->node_id; | ||
155 | free_huge_pages--; | ||
156 | free_huge_pages_node[nid]--; | ||
157 | if (count >= nr_huge_pages) | ||
158 | return; | ||
159 | } | ||
160 | } | ||
161 | } | ||
162 | #else | ||
163 | static inline void try_to_free_low(unsigned long count) | ||
164 | { | ||
165 | } | ||
166 | #endif | ||
167 | |||
168 | static unsigned long set_max_huge_pages(unsigned long count) | ||
169 | { | ||
170 | while (count > nr_huge_pages) { | ||
171 | struct page *page = alloc_fresh_huge_page(); | ||
172 | if (!page) | ||
173 | return nr_huge_pages; | ||
174 | spin_lock(&hugetlb_lock); | ||
175 | enqueue_huge_page(page); | ||
176 | spin_unlock(&hugetlb_lock); | ||
177 | } | ||
178 | if (count >= nr_huge_pages) | ||
179 | return nr_huge_pages; | ||
180 | |||
181 | spin_lock(&hugetlb_lock); | ||
182 | try_to_free_low(count); | ||
183 | while (count < nr_huge_pages) { | ||
184 | struct page *page = dequeue_huge_page(); | ||
185 | if (!page) | ||
186 | break; | ||
187 | update_and_free_page(page); | ||
188 | } | ||
189 | spin_unlock(&hugetlb_lock); | ||
190 | return nr_huge_pages; | ||
191 | } | ||
192 | |||
193 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | ||
194 | struct file *file, void __user *buffer, | ||
195 | size_t *length, loff_t *ppos) | ||
196 | { | ||
197 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | ||
198 | max_huge_pages = set_max_huge_pages(max_huge_pages); | ||
199 | return 0; | ||
200 | } | ||
201 | #endif /* CONFIG_SYSCTL */ | ||
202 | |||
203 | int hugetlb_report_meminfo(char *buf) | ||
204 | { | ||
205 | return sprintf(buf, | ||
206 | "HugePages_Total: %5lu\n" | ||
207 | "HugePages_Free: %5lu\n" | ||
208 | "Hugepagesize: %5lu kB\n", | ||
209 | nr_huge_pages, | ||
210 | free_huge_pages, | ||
211 | HPAGE_SIZE/1024); | ||
212 | } | ||
213 | |||
214 | int hugetlb_report_node_meminfo(int nid, char *buf) | ||
215 | { | ||
216 | return sprintf(buf, | ||
217 | "Node %d HugePages_Total: %5u\n" | ||
218 | "Node %d HugePages_Free: %5u\n", | ||
219 | nid, nr_huge_pages_node[nid], | ||
220 | nid, free_huge_pages_node[nid]); | ||
221 | } | ||
222 | |||
223 | int is_hugepage_mem_enough(size_t size) | ||
224 | { | ||
225 | return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages; | ||
226 | } | ||
227 | |||
228 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ | ||
229 | unsigned long hugetlb_total_pages(void) | ||
230 | { | ||
231 | return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); | ||
232 | } | ||
233 | EXPORT_SYMBOL(hugetlb_total_pages); | ||
234 | |||
235 | /* | ||
236 | * We cannot handle pagefaults against hugetlb pages at all. They cause | ||
237 | * handle_mm_fault() to try to instantiate regular-sized pages in the | ||
238 | * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get | ||
239 | * this far. | ||
240 | */ | ||
241 | static struct page *hugetlb_nopage(struct vm_area_struct *vma, | ||
242 | unsigned long address, int *unused) | ||
243 | { | ||
244 | BUG(); | ||
245 | return NULL; | ||
246 | } | ||
247 | |||
248 | struct vm_operations_struct hugetlb_vm_ops = { | ||
249 | .nopage = hugetlb_nopage, | ||
250 | }; | ||
251 | |||
252 | void zap_hugepage_range(struct vm_area_struct *vma, | ||
253 | unsigned long start, unsigned long length) | ||
254 | { | ||
255 | struct mm_struct *mm = vma->vm_mm; | ||
256 | |||
257 | spin_lock(&mm->page_table_lock); | ||
258 | unmap_hugepage_range(vma, start, start + length); | ||
259 | spin_unlock(&mm->page_table_lock); | ||
260 | } | ||