diff options
-rw-r--r-- | mm/hugetlb.c | 172 |
1 files changed, 145 insertions, 27 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 72acbb29d2cc..65616941a383 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -43,6 +43,16 @@ static DEFINE_SPINLOCK(hugetlb_lock); | |||
43 | /* | 43 | /* |
44 | * Region tracking -- allows tracking of reservations and instantiated pages | 44 | * Region tracking -- allows tracking of reservations and instantiated pages |
45 | * across the pages in a mapping. | 45 | * across the pages in a mapping. |
46 | * | ||
47 | * The region data structures are protected by a combination of the mmap_sem | ||
48 | * and the hugetlb_instantion_mutex. To access or modify a region the caller | ||
49 | * must either hold the mmap_sem for write, or the mmap_sem for read and | ||
50 | * the hugetlb_instantiation mutex: | ||
51 | * | ||
52 | * down_write(&mm->mmap_sem); | ||
53 | * or | ||
54 | * down_read(&mm->mmap_sem); | ||
55 | * mutex_lock(&hugetlb_instantiation_mutex); | ||
46 | */ | 56 | */ |
47 | struct file_region { | 57 | struct file_region { |
48 | struct list_head link; | 58 | struct list_head link; |
@@ -165,6 +175,30 @@ static long region_truncate(struct list_head *head, long end) | |||
165 | return chg; | 175 | return chg; |
166 | } | 176 | } |
167 | 177 | ||
178 | static long region_count(struct list_head *head, long f, long t) | ||
179 | { | ||
180 | struct file_region *rg; | ||
181 | long chg = 0; | ||
182 | |||
183 | /* Locate each segment we overlap with, and count that overlap. */ | ||
184 | list_for_each_entry(rg, head, link) { | ||
185 | int seg_from; | ||
186 | int seg_to; | ||
187 | |||
188 | if (rg->to <= f) | ||
189 | continue; | ||
190 | if (rg->from >= t) | ||
191 | break; | ||
192 | |||
193 | seg_from = max(rg->from, f); | ||
194 | seg_to = min(rg->to, t); | ||
195 | |||
196 | chg += seg_to - seg_from; | ||
197 | } | ||
198 | |||
199 | return chg; | ||
200 | } | ||
201 | |||
168 | /* | 202 | /* |
169 | * Convert the address within this vma to the page offset within | 203 | * Convert the address within this vma to the page offset within |
170 | * the mapping, in base page units. | 204 | * the mapping, in base page units. |
@@ -187,9 +221,15 @@ static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma, | |||
187 | (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | 221 | (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); |
188 | } | 222 | } |
189 | 223 | ||
190 | #define HPAGE_RESV_OWNER (1UL << (BITS_PER_LONG - 1)) | 224 | /* |
191 | #define HPAGE_RESV_UNMAPPED (1UL << (BITS_PER_LONG - 2)) | 225 | * Flags for MAP_PRIVATE reservations. These are stored in the bottom |
226 | * bits of the reservation map pointer, which are always clear due to | ||
227 | * alignment. | ||
228 | */ | ||
229 | #define HPAGE_RESV_OWNER (1UL << 0) | ||
230 | #define HPAGE_RESV_UNMAPPED (1UL << 1) | ||
192 | #define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED) | 231 | #define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED) |
232 | |||
193 | /* | 233 | /* |
194 | * These helpers are used to track how many pages are reserved for | 234 | * These helpers are used to track how many pages are reserved for |
195 | * faults in a MAP_PRIVATE mapping. Only the process that called mmap() | 235 | * faults in a MAP_PRIVATE mapping. Only the process that called mmap() |
@@ -199,6 +239,15 @@ static pgoff_t vma_pagecache_offset(struct vm_area_struct *vma, | |||
199 | * the reserve counters are updated with the hugetlb_lock held. It is safe | 239 | * the reserve counters are updated with the hugetlb_lock held. It is safe |
200 | * to reset the VMA at fork() time as it is not in use yet and there is no | 240 | * to reset the VMA at fork() time as it is not in use yet and there is no |
201 | * chance of the global counters getting corrupted as a result of the values. | 241 | * chance of the global counters getting corrupted as a result of the values. |
242 | * | ||
243 | * The private mapping reservation is represented in a subtly different | ||
244 | * manner to a shared mapping. A shared mapping has a region map associated | ||
245 | * with the underlying file, this region map represents the backing file | ||
246 | * pages which have ever had a reservation assigned which this persists even | ||
247 | * after the page is instantiated. A private mapping has a region map | ||
248 | * associated with the original mmap which is attached to all VMAs which | ||
249 | * reference it, this region map represents those offsets which have consumed | ||
250 | * reservation ie. where pages have been instantiated. | ||
202 | */ | 251 | */ |
203 | static unsigned long get_vma_private_data(struct vm_area_struct *vma) | 252 | static unsigned long get_vma_private_data(struct vm_area_struct *vma) |
204 | { | 253 | { |
@@ -211,22 +260,48 @@ static void set_vma_private_data(struct vm_area_struct *vma, | |||
211 | vma->vm_private_data = (void *)value; | 260 | vma->vm_private_data = (void *)value; |
212 | } | 261 | } |
213 | 262 | ||
214 | static unsigned long vma_resv_huge_pages(struct vm_area_struct *vma) | 263 | struct resv_map { |
264 | struct kref refs; | ||
265 | struct list_head regions; | ||
266 | }; | ||
267 | |||
268 | struct resv_map *resv_map_alloc(void) | ||
269 | { | ||
270 | struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); | ||
271 | if (!resv_map) | ||
272 | return NULL; | ||
273 | |||
274 | kref_init(&resv_map->refs); | ||
275 | INIT_LIST_HEAD(&resv_map->regions); | ||
276 | |||
277 | return resv_map; | ||
278 | } | ||
279 | |||
280 | void resv_map_release(struct kref *ref) | ||
281 | { | ||
282 | struct resv_map *resv_map = container_of(ref, struct resv_map, refs); | ||
283 | |||
284 | /* Clear out any active regions before we release the map. */ | ||
285 | region_truncate(&resv_map->regions, 0); | ||
286 | kfree(resv_map); | ||
287 | } | ||
288 | |||
289 | static struct resv_map *vma_resv_map(struct vm_area_struct *vma) | ||
215 | { | 290 | { |
216 | VM_BUG_ON(!is_vm_hugetlb_page(vma)); | 291 | VM_BUG_ON(!is_vm_hugetlb_page(vma)); |
217 | if (!(vma->vm_flags & VM_SHARED)) | 292 | if (!(vma->vm_flags & VM_SHARED)) |
218 | return get_vma_private_data(vma) & ~HPAGE_RESV_MASK; | 293 | return (struct resv_map *)(get_vma_private_data(vma) & |
294 | ~HPAGE_RESV_MASK); | ||
219 | return 0; | 295 | return 0; |
220 | } | 296 | } |
221 | 297 | ||
222 | static void set_vma_resv_huge_pages(struct vm_area_struct *vma, | 298 | static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) |
223 | unsigned long reserve) | ||
224 | { | 299 | { |
225 | VM_BUG_ON(!is_vm_hugetlb_page(vma)); | 300 | VM_BUG_ON(!is_vm_hugetlb_page(vma)); |
226 | VM_BUG_ON(vma->vm_flags & VM_SHARED); | 301 | VM_BUG_ON(vma->vm_flags & VM_SHARED); |
227 | 302 | ||
228 | set_vma_private_data(vma, | 303 | set_vma_private_data(vma, (get_vma_private_data(vma) & |
229 | (get_vma_private_data(vma) & HPAGE_RESV_MASK) | reserve); | 304 | HPAGE_RESV_MASK) | (unsigned long)map); |
230 | } | 305 | } |
231 | 306 | ||
232 | static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) | 307 | static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) |
@@ -253,19 +328,12 @@ static void decrement_hugepage_resv_vma(struct vm_area_struct *vma) | |||
253 | if (vma->vm_flags & VM_SHARED) { | 328 | if (vma->vm_flags & VM_SHARED) { |
254 | /* Shared mappings always use reserves */ | 329 | /* Shared mappings always use reserves */ |
255 | resv_huge_pages--; | 330 | resv_huge_pages--; |
256 | } else { | 331 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { |
257 | /* | 332 | /* |
258 | * Only the process that called mmap() has reserves for | 333 | * Only the process that called mmap() has reserves for |
259 | * private mappings. | 334 | * private mappings. |
260 | */ | 335 | */ |
261 | if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { | 336 | resv_huge_pages--; |
262 | unsigned long flags, reserve; | ||
263 | resv_huge_pages--; | ||
264 | flags = (unsigned long)vma->vm_private_data & | ||
265 | HPAGE_RESV_MASK; | ||
266 | reserve = (unsigned long)vma->vm_private_data - 1; | ||
267 | vma->vm_private_data = (void *)(reserve | flags); | ||
268 | } | ||
269 | } | 337 | } |
270 | } | 338 | } |
271 | 339 | ||
@@ -282,7 +350,7 @@ static int vma_has_private_reserves(struct vm_area_struct *vma) | |||
282 | { | 350 | { |
283 | if (vma->vm_flags & VM_SHARED) | 351 | if (vma->vm_flags & VM_SHARED) |
284 | return 0; | 352 | return 0; |
285 | if (!vma_resv_huge_pages(vma)) | 353 | if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) |
286 | return 0; | 354 | return 0; |
287 | return 1; | 355 | return 1; |
288 | } | 356 | } |
@@ -742,12 +810,19 @@ static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr) | |||
742 | return region_chg(&inode->i_mapping->private_list, | 810 | return region_chg(&inode->i_mapping->private_list, |
743 | idx, idx + 1); | 811 | idx, idx + 1); |
744 | 812 | ||
745 | } else { | 813 | } else if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { |
746 | if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) | 814 | return 1; |
747 | return 1; | ||
748 | } | ||
749 | 815 | ||
750 | return 0; | 816 | } else { |
817 | int err; | ||
818 | pgoff_t idx = vma_pagecache_offset(vma, addr); | ||
819 | struct resv_map *reservations = vma_resv_map(vma); | ||
820 | |||
821 | err = region_chg(&reservations->regions, idx, idx + 1); | ||
822 | if (err < 0) | ||
823 | return err; | ||
824 | return 0; | ||
825 | } | ||
751 | } | 826 | } |
752 | static void vma_commit_reservation(struct vm_area_struct *vma, | 827 | static void vma_commit_reservation(struct vm_area_struct *vma, |
753 | unsigned long addr) | 828 | unsigned long addr) |
@@ -758,6 +833,13 @@ static void vma_commit_reservation(struct vm_area_struct *vma, | |||
758 | if (vma->vm_flags & VM_SHARED) { | 833 | if (vma->vm_flags & VM_SHARED) { |
759 | pgoff_t idx = vma_pagecache_offset(vma, addr); | 834 | pgoff_t idx = vma_pagecache_offset(vma, addr); |
760 | region_add(&inode->i_mapping->private_list, idx, idx + 1); | 835 | region_add(&inode->i_mapping->private_list, idx, idx + 1); |
836 | |||
837 | } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { | ||
838 | pgoff_t idx = vma_pagecache_offset(vma, addr); | ||
839 | struct resv_map *reservations = vma_resv_map(vma); | ||
840 | |||
841 | /* Mark this page used in the map. */ | ||
842 | region_add(&reservations->regions, idx, idx + 1); | ||
761 | } | 843 | } |
762 | } | 844 | } |
763 | 845 | ||
@@ -1047,11 +1129,41 @@ out: | |||
1047 | return ret; | 1129 | return ret; |
1048 | } | 1130 | } |
1049 | 1131 | ||
1132 | static void hugetlb_vm_op_open(struct vm_area_struct *vma) | ||
1133 | { | ||
1134 | struct resv_map *reservations = vma_resv_map(vma); | ||
1135 | |||
1136 | /* | ||
1137 | * This new VMA should share its siblings reservation map if present. | ||
1138 | * The VMA will only ever have a valid reservation map pointer where | ||
1139 | * it is being copied for another still existing VMA. As that VMA | ||
1140 | * has a reference to the reservation map it cannot dissappear until | ||
1141 | * after this open call completes. It is therefore safe to take a | ||
1142 | * new reference here without additional locking. | ||
1143 | */ | ||
1144 | if (reservations) | ||
1145 | kref_get(&reservations->refs); | ||
1146 | } | ||
1147 | |||
1050 | static void hugetlb_vm_op_close(struct vm_area_struct *vma) | 1148 | static void hugetlb_vm_op_close(struct vm_area_struct *vma) |
1051 | { | 1149 | { |
1052 | unsigned long reserve = vma_resv_huge_pages(vma); | 1150 | struct resv_map *reservations = vma_resv_map(vma); |
1053 | if (reserve) | 1151 | unsigned long reserve; |
1054 | hugetlb_acct_memory(-reserve); | 1152 | unsigned long start; |
1153 | unsigned long end; | ||
1154 | |||
1155 | if (reservations) { | ||
1156 | start = vma_pagecache_offset(vma, vma->vm_start); | ||
1157 | end = vma_pagecache_offset(vma, vma->vm_end); | ||
1158 | |||
1159 | reserve = (end - start) - | ||
1160 | region_count(&reservations->regions, start, end); | ||
1161 | |||
1162 | kref_put(&reservations->refs, resv_map_release); | ||
1163 | |||
1164 | if (reserve) | ||
1165 | hugetlb_acct_memory(-reserve); | ||
1166 | } | ||
1055 | } | 1167 | } |
1056 | 1168 | ||
1057 | /* | 1169 | /* |
@@ -1068,6 +1180,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1068 | 1180 | ||
1069 | struct vm_operations_struct hugetlb_vm_ops = { | 1181 | struct vm_operations_struct hugetlb_vm_ops = { |
1070 | .fault = hugetlb_vm_op_fault, | 1182 | .fault = hugetlb_vm_op_fault, |
1183 | .open = hugetlb_vm_op_open, | ||
1071 | .close = hugetlb_vm_op_close, | 1184 | .close = hugetlb_vm_op_close, |
1072 | }; | 1185 | }; |
1073 | 1186 | ||
@@ -1617,8 +1730,13 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
1617 | if (!vma || vma->vm_flags & VM_SHARED) | 1730 | if (!vma || vma->vm_flags & VM_SHARED) |
1618 | chg = region_chg(&inode->i_mapping->private_list, from, to); | 1731 | chg = region_chg(&inode->i_mapping->private_list, from, to); |
1619 | else { | 1732 | else { |
1733 | struct resv_map *resv_map = resv_map_alloc(); | ||
1734 | if (!resv_map) | ||
1735 | return -ENOMEM; | ||
1736 | |||
1620 | chg = to - from; | 1737 | chg = to - from; |
1621 | set_vma_resv_huge_pages(vma, chg); | 1738 | |
1739 | set_vma_resv_map(vma, resv_map); | ||
1622 | set_vma_resv_flags(vma, HPAGE_RESV_OWNER); | 1740 | set_vma_resv_flags(vma, HPAGE_RESV_OWNER); |
1623 | } | 1741 | } |
1624 | 1742 | ||