aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/hugetlb.c172
1 files changed, 145 insertions, 27 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 72acbb29d2cc..65616941a383 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -43,6 +43,16 @@ static DEFINE_SPINLOCK(hugetlb_lock);
43/* 43/*
44 * Region tracking -- allows tracking of reservations and instantiated pages 44 * Region tracking -- allows tracking of reservations and instantiated pages
45 * across the pages in a mapping. 45 * across the pages in a mapping.
46 *
47 * The region data structures are protected by a combination of the mmap_sem
48 * and the hugetlb_instantion_mutex. To access or modify a region the caller
49 * must either hold the mmap_sem for write, or the mmap_sem for read and
50 * the hugetlb_instantiation mutex:
51 *
52 * down_write(&mm->mmap_sem);
53 * or
54 * down_read(&mm->mmap_sem);
55 * mutex_lock(&hugetlb_instantiation_mutex);
46 */ 56 */
47struct file_region { 57struct file_region {
48 struct list_head link; 58 struct list_head link;
@@ -165,6 +175,30 @@ static long region_truncate(struct list_head *head, long end)
165 return chg; 175 return chg;
166} 176}
167 177
178static long region_count(struct list_head *head, long f, long t)
179{
180 struct file_region *rg;
181 long chg = 0;
182
183 /* Locate each segment we overlap with, and count that overlap. */
184 list_for_each_entry(rg, head, link) {
185 int seg_from;
186 int seg_to;
187
188 if (rg->to <= f)
189 continue;
190 if (rg->from >= t)
191 break;
192
193 seg_from = max(rg->from, f);
194 seg_to = min(rg->to, t);
195
196 chg += seg_to - seg_from;
197 }
198
199 return chg;
200}
201
168/* 202/*
169 * Convert the address within this vma to the page offset within 203 * Convert the address within this vma to the page offset within
170 * the mapping, in base page units. 204 * the mapping, in base page units.
@@ -187,9 +221,15 @@ static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma,
187 (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); 221 (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
188} 222}
189 223
190#define HPAGE_RESV_OWNER (1UL << (BITS_PER_LONG - 1)) 224/*
191#define HPAGE_RESV_UNMAPPED (1UL << (BITS_PER_LONG - 2)) 225 * Flags for MAP_PRIVATE reservations. These are stored in the bottom
226 * bits of the reservation map pointer, which are always clear due to
227 * alignment.
228 */
229#define HPAGE_RESV_OWNER (1UL << 0)
230#define HPAGE_RESV_UNMAPPED (1UL << 1)
192#define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED) 231#define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)
232
193/* 233/*
194 * These helpers are used to track how many pages are reserved for 234 * These helpers are used to track how many pages are reserved for
195 * faults in a MAP_PRIVATE mapping. Only the process that called mmap() 235 * faults in a MAP_PRIVATE mapping. Only the process that called mmap()
@@ -199,6 +239,15 @@ static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma,
199 * the reserve counters are updated with the hugetlb_lock held. It is safe 239 * the reserve counters are updated with the hugetlb_lock held. It is safe
200 * to reset the VMA at fork() time as it is not in use yet and there is no 240 * to reset the VMA at fork() time as it is not in use yet and there is no
201 * chance of the global counters getting corrupted as a result of the values. 241 * chance of the global counters getting corrupted as a result of the values.
242 *
243 * The private mapping reservation is represented in a subtly different
244 * manner to a shared mapping. A shared mapping has a region map associated
245 * with the underlying file, this region map represents the backing file
246 * pages which have ever had a reservation assigned which this persists even
247 * after the page is instantiated. A private mapping has a region map
248 * associated with the original mmap which is attached to all VMAs which
249 * reference it, this region map represents those offsets which have consumed
250 * reservation ie. where pages have been instantiated.
202 */ 251 */
203static unsigned long get_vma_private_data(struct vm_area_struct *vma) 252static unsigned long get_vma_private_data(struct vm_area_struct *vma)
204{ 253{
@@ -211,22 +260,48 @@ static void set_vma_private_data(struct vm_area_struct *vma,
211 vma->vm_private_data = (void *)value; 260 vma->vm_private_data = (void *)value;
212} 261}
213 262
214static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma) 263struct resv_map {
264 struct kref refs;
265 struct list_head regions;
266};
267
268struct resv_map *resv_map_alloc(void)
269{
270 struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
271 if (!resv_map)
272 return NULL;
273
274 kref_init(&resv_map->refs);
275 INIT_LIST_HEAD(&resv_map->regions);
276
277 return resv_map;
278}
279
280void resv_map_release(struct kref *ref)
281{
282 struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
283
284 /* Clear out any active regions before we release the map. */
285 region_truncate(&resv_map->regions, 0);
286 kfree(resv_map);
287}
288
289static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
215{ 290{
216 VM_BUG_ON(!is_vm_hugetlb_page(vma)); 291 VM_BUG_ON(!is_vm_hugetlb_page(vma));
217 if (!(vma->vm_flags & VM_SHARED)) 292 if (!(vma->vm_flags & VM_SHARED))
218 return get_vma_private_data(vma) & ~HPAGE_RESV_MASK; 293 return (struct resv_map *)(get_vma_private_data(vma) &
294 ~HPAGE_RESV_MASK);
219 return 0; 295 return 0;
220} 296}
221 297
222static void set_vma_resv_huge_pages(struct vm_area_struct *vma, 298static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
223 unsigned long reserve)
224{ 299{
225 VM_BUG_ON(!is_vm_hugetlb_page(vma)); 300 VM_BUG_ON(!is_vm_hugetlb_page(vma));
226 VM_BUG_ON(vma->vm_flags & VM_SHARED); 301 VM_BUG_ON(vma->vm_flags & VM_SHARED);
227 302
228 set_vma_private_data(vma, 303 set_vma_private_data(vma, (get_vma_private_data(vma) &
229 (get_vma_private_data(vma) & HPAGE_RESV_MASK) | reserve); 304 HPAGE_RESV_MASK) | (unsigned long)map);
230} 305}
231 306
232static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) 307static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
@@ -253,19 +328,12 @@ static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
253 if (vma->vm_flags & VM_SHARED) { 328 if (vma->vm_flags & VM_SHARED) {
254 /* Shared mappings always use reserves */ 329 /* Shared mappings always use reserves */
255 resv_huge_pages--; 330 resv_huge_pages--;
256 } else { 331 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
257 /* 332 /*
258 * Only the process that called mmap() has reserves for 333 * Only the process that called mmap() has reserves for
259 * private mappings. 334 * private mappings.
260 */ 335 */
261 if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { 336 resv_huge_pages--;
262 unsigned long flags, reserve;
263 resv_huge_pages--;
264 flags = (unsigned long)vma->vm_private_data &
265 HPAGE_RESV_MASK;
266 reserve = (unsigned long)vma->vm_private_data - 1;
267 vma->vm_private_data = (void *)(reserve | flags);
268 }
269 } 337 }
270} 338}
271 339
@@ -282,7 +350,7 @@ static int vma_has_private_reserves(struct vm_area_struct *vma)
282{ 350{
283 if (vma->vm_flags & VM_SHARED) 351 if (vma->vm_flags & VM_SHARED)
284 return 0; 352 return 0;
285 if (!vma_resv_huge_pages(vma)) 353 if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
286 return 0; 354 return 0;
287 return 1; 355 return 1;
288} 356}
@@ -742,12 +810,19 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
742 return region_chg(&inode->i_mapping->private_list, 810 return region_chg(&inode->i_mapping->private_list,
743 idx, idx + 1); 811 idx, idx + 1);
744 812
745 } else { 813 } else if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
746 if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) 814 return 1;
747 return 1;
748 }
749 815
750 return 0; 816 } else {
817 int err;
818 pgoff_t idx = vma_pagecache_offset(vma, addr);
819 struct resv_map *reservations = vma_resv_map(vma);
820
821 err = region_chg(&reservations->regions, idx, idx + 1);
822 if (err < 0)
823 return err;
824 return 0;
825 }
751} 826}
752static void vma_commit_reservation(struct vm_area_struct *vma, 827static void vma_commit_reservation(struct vm_area_struct *vma,
753 unsigned long addr) 828 unsigned long addr)
@@ -758,6 +833,13 @@ static void vma_commit_reservation(struct vm_area_struct *vma,
758 if (vma->vm_flags & VM_SHARED) { 833 if (vma->vm_flags & VM_SHARED) {
759 pgoff_t idx = vma_pagecache_offset(vma, addr); 834 pgoff_t idx = vma_pagecache_offset(vma, addr);
760 region_add(&inode->i_mapping->private_list, idx, idx + 1); 835 region_add(&inode->i_mapping->private_list, idx, idx + 1);
836
837 } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
838 pgoff_t idx = vma_pagecache_offset(vma, addr);
839 struct resv_map *reservations = vma_resv_map(vma);
840
841 /* Mark this page used in the map. */
842 region_add(&reservations->regions, idx, idx + 1);
761 } 843 }
762} 844}
763 845
@@ -1047,11 +1129,41 @@ out:
1047 return ret; 1129 return ret;
1048} 1130}
1049 1131
1132static void hugetlb_vm_op_open(struct vm_area_struct *vma)
1133{
1134 struct resv_map *reservations = vma_resv_map(vma);
1135
1136 /*
1137 * This new VMA should share its siblings reservation map if present.
1138 * The VMA will only ever have a valid reservation map pointer where
1139 * it is being copied for another still existing VMA. As that VMA
1140 * has a reference to the reservation map it cannot dissappear until
1141 * after this open call completes. It is therefore safe to take a
1142 * new reference here without additional locking.
1143 */
1144 if (reservations)
1145 kref_get(&reservations->refs);
1146}
1147
1050static void hugetlb_vm_op_close(struct vm_area_struct *vma) 1148static void hugetlb_vm_op_close(struct vm_area_struct *vma)
1051{ 1149{
1052 unsigned long reserve = vma_resv_huge_pages(vma); 1150 struct resv_map *reservations = vma_resv_map(vma);
1053 if (reserve) 1151 unsigned long reserve;
1054 hugetlb_acct_memory(-reserve); 1152 unsigned long start;
1153 unsigned long end;
1154
1155 if (reservations) {
1156 start = vma_pagecache_offset(vma, vma->vm_start);
1157 end = vma_pagecache_offset(vma, vma->vm_end);
1158
1159 reserve = (end - start) -
1160 region_count(&reservations->regions, start, end);
1161
1162 kref_put(&reservations->refs, resv_map_release);
1163
1164 if (reserve)
1165 hugetlb_acct_memory(-reserve);
1166 }
1055} 1167}
1056 1168
1057/* 1169/*
@@ -1068,6 +1180,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1068 1180
1069struct vm_operations_struct hugetlb_vm_ops = { 1181struct vm_operations_struct hugetlb_vm_ops = {
1070 .fault = hugetlb_vm_op_fault, 1182 .fault = hugetlb_vm_op_fault,
1183 .open = hugetlb_vm_op_open,
1071 .close = hugetlb_vm_op_close, 1184 .close = hugetlb_vm_op_close,
1072}; 1185};
1073 1186
@@ -1617,8 +1730,13 @@ int hugetlb_reserve_pages(struct inode *inode,
1617 if (!vma || vma->vm_flags & VM_SHARED) 1730 if (!vma || vma->vm_flags & VM_SHARED)
1618 chg = region_chg(&inode->i_mapping->private_list, from, to); 1731 chg = region_chg(&inode->i_mapping->private_list, from, to);
1619 else { 1732 else {
1733 struct resv_map *resv_map = resv_map_alloc();
1734 if (!resv_map)
1735 return -ENOMEM;
1736
1620 chg = to - from; 1737 chg = to - from;
1621 set_vma_resv_huge_pages(vma, chg); 1738
1739 set_vma_resv_map(vma, resv_map);
1622 set_vma_resv_flags(vma, HPAGE_RESV_OWNER); 1740 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
1623 } 1741 }
1624 1742