diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 136 |
1 files changed, 126 insertions, 10 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d5987a87bbe5..27fad5d9bcf6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include "internal.h" | 22 | #include "internal.h" |
23 | 23 | ||
24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
25 | static unsigned long nr_huge_pages, free_huge_pages; | 25 | static unsigned long nr_huge_pages, free_huge_pages, reserved_huge_pages; |
26 | unsigned long max_huge_pages; | 26 | unsigned long max_huge_pages; |
27 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | 27 | static struct list_head hugepage_freelists[MAX_NUMNODES]; |
28 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | 28 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; |
@@ -120,17 +120,136 @@ void free_huge_page(struct page *page) | |||
120 | 120 | ||
121 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) | 121 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) |
122 | { | 122 | { |
123 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | ||
123 | struct page *page; | 124 | struct page *page; |
125 | int use_reserve = 0; | ||
126 | unsigned long idx; | ||
124 | 127 | ||
125 | spin_lock(&hugetlb_lock); | 128 | spin_lock(&hugetlb_lock); |
126 | page = dequeue_huge_page(vma, addr); | 129 | |
127 | if (!page) { | 130 | if (vma->vm_flags & VM_MAYSHARE) { |
128 | spin_unlock(&hugetlb_lock); | 131 | |
129 | return NULL; | 132 | /* idx = radix tree index, i.e. offset into file in |
133 | * HPAGE_SIZE units */ | ||
134 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
135 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
136 | |||
137 | /* The hugetlbfs specific inode info stores the number | ||
138 | * of "guaranteed available" (huge) pages. That is, | ||
139 | * the first 'prereserved_hpages' pages of the inode | ||
140 | * are either already instantiated, or have been | ||
141 | * pre-reserved (by hugetlb_reserve_for_inode()). Here | ||
142 | * we're in the process of instantiating the page, so | ||
143 | * we use this to determine whether to draw from the | ||
144 | * pre-reserved pool or the truly free pool. */ | ||
145 | if (idx < HUGETLBFS_I(inode)->prereserved_hpages) | ||
146 | use_reserve = 1; | ||
147 | } | ||
148 | |||
149 | if (!use_reserve) { | ||
150 | if (free_huge_pages <= reserved_huge_pages) | ||
151 | goto fail; | ||
152 | } else { | ||
153 | BUG_ON(reserved_huge_pages == 0); | ||
154 | reserved_huge_pages--; | ||
130 | } | 155 | } |
156 | |||
157 | page = dequeue_huge_page(vma, addr); | ||
158 | if (!page) | ||
159 | goto fail; | ||
160 | |||
131 | spin_unlock(&hugetlb_lock); | 161 | spin_unlock(&hugetlb_lock); |
132 | set_page_refcounted(page); | 162 | set_page_refcounted(page); |
133 | return page; | 163 | return page; |
164 | |||
165 | fail: | ||
166 | WARN_ON(use_reserve); /* reserved allocations shouldn't fail */ | ||
167 | spin_unlock(&hugetlb_lock); | ||
168 | return NULL; | ||
169 | } | ||
170 | |||
171 | /* hugetlb_extend_reservation() | ||
172 | * | ||
173 | * Ensure that at least 'atleast' hugepages are, and will remain, | ||
174 | * available to instantiate the first 'atleast' pages of the given | ||
175 | * inode. If the inode doesn't already have this many pages reserved | ||
176 | * or instantiated, set aside some hugepages in the reserved pool to | ||
177 | * satisfy later faults (or fail now if there aren't enough, rather | ||
178 | * than getting the SIGBUS later). | ||
179 | */ | ||
180 | int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, | ||
181 | unsigned long atleast) | ||
182 | { | ||
183 | struct inode *inode = &info->vfs_inode; | ||
184 | unsigned long change_in_reserve = 0; | ||
185 | int ret = 0; | ||
186 | |||
187 | spin_lock(&hugetlb_lock); | ||
188 | read_lock_irq(&inode->i_mapping->tree_lock); | ||
189 | |||
190 | if (info->prereserved_hpages >= atleast) | ||
191 | goto out; | ||
192 | |||
193 | /* Because we always call this on shared mappings, none of the | ||
194 | * pages beyond info->prereserved_hpages can have been | ||
195 | * instantiated, so we need to reserve all of them now. */ | ||
196 | change_in_reserve = atleast - info->prereserved_hpages; | ||
197 | |||
198 | if ((reserved_huge_pages + change_in_reserve) > free_huge_pages) { | ||
199 | ret = -ENOMEM; | ||
200 | goto out; | ||
201 | } | ||
202 | |||
203 | reserved_huge_pages += change_in_reserve; | ||
204 | info->prereserved_hpages = atleast; | ||
205 | |||
206 | out: | ||
207 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
208 | spin_unlock(&hugetlb_lock); | ||
209 | |||
210 | return ret; | ||
211 | } | ||
212 | |||
213 | /* hugetlb_truncate_reservation() | ||
214 | * | ||
215 | * This returns pages reserved for the given inode to the general free | ||
216 | * hugepage pool. If the inode has any pages prereserved, but not | ||
217 | * instantiated, beyond offset (atmost << HPAGE_SIZE), then release | ||
218 | * them. | ||
219 | */ | ||
220 | void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, | ||
221 | unsigned long atmost) | ||
222 | { | ||
223 | struct inode *inode = &info->vfs_inode; | ||
224 | struct address_space *mapping = inode->i_mapping; | ||
225 | unsigned long idx; | ||
226 | unsigned long change_in_reserve = 0; | ||
227 | struct page *page; | ||
228 | |||
229 | spin_lock(&hugetlb_lock); | ||
230 | read_lock_irq(&inode->i_mapping->tree_lock); | ||
231 | |||
232 | if (info->prereserved_hpages <= atmost) | ||
233 | goto out; | ||
234 | |||
235 | /* Count pages which were reserved, but not instantiated, and | ||
236 | * which we can now release. */ | ||
237 | for (idx = atmost; idx < info->prereserved_hpages; idx++) { | ||
238 | page = radix_tree_lookup(&mapping->page_tree, idx); | ||
239 | if (!page) | ||
240 | /* Pages which are already instantiated can't | ||
241 | * be unreserved (and in fact have already | ||
242 | * been removed from the reserved pool) */ | ||
243 | change_in_reserve++; | ||
244 | } | ||
245 | |||
246 | BUG_ON(reserved_huge_pages < change_in_reserve); | ||
247 | reserved_huge_pages -= change_in_reserve; | ||
248 | info->prereserved_hpages = atmost; | ||
249 | |||
250 | out: | ||
251 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
252 | spin_unlock(&hugetlb_lock); | ||
134 | } | 253 | } |
135 | 254 | ||
136 | static int __init hugetlb_init(void) | 255 | static int __init hugetlb_init(void) |
@@ -238,9 +357,11 @@ int hugetlb_report_meminfo(char *buf) | |||
238 | return sprintf(buf, | 357 | return sprintf(buf, |
239 | "HugePages_Total: %5lu\n" | 358 | "HugePages_Total: %5lu\n" |
240 | "HugePages_Free: %5lu\n" | 359 | "HugePages_Free: %5lu\n" |
360 | "HugePages_Rsvd: %5lu\n" | ||
241 | "Hugepagesize: %5lu kB\n", | 361 | "Hugepagesize: %5lu kB\n", |
242 | nr_huge_pages, | 362 | nr_huge_pages, |
243 | free_huge_pages, | 363 | free_huge_pages, |
364 | reserved_huge_pages, | ||
244 | HPAGE_SIZE/1024); | 365 | HPAGE_SIZE/1024); |
245 | } | 366 | } |
246 | 367 | ||
@@ -253,11 +374,6 @@ int hugetlb_report_node_meminfo(int nid, char *buf) | |||
253 | nid, free_huge_pages_node[nid]); | 374 | nid, free_huge_pages_node[nid]); |
254 | } | 375 | } |
255 | 376 | ||
256 | int is_hugepage_mem_enough(size_t size) | ||
257 | { | ||
258 | return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages; | ||
259 | } | ||
260 | |||
261 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ | 377 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ |
262 | unsigned long hugetlb_total_pages(void) | 378 | unsigned long hugetlb_total_pages(void) |
263 | { | 379 | { |