diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 44 |
1 files changed, 32 insertions, 12 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index d480cd6fc475..192e6eebe4f2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page) | |||
133 | 133 | ||
134 | /** | 134 | /** |
135 | * munlock_vma_page - munlock a vma page | 135 | * munlock_vma_page - munlock a vma page |
136 | * @page - page to be unlocked | 136 | * @page - page to be unlocked, either a normal page or THP page head |
137 | * | ||
138 | * returns the size of the page as a page mask (0 for normal page, | ||
139 | * HPAGE_PMD_NR - 1 for THP head page) | ||
137 | * | 140 | * |
138 | * called from munlock()/munmap() path with page supposedly on the LRU. | 141 | * called from munlock()/munmap() path with page supposedly on the LRU. |
139 | * When we munlock a page, because the vma where we found the page is being | 142 | * When we munlock a page, because the vma where we found the page is being |
@@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page) | |||
148 | */ | 151 | */ |
149 | unsigned int munlock_vma_page(struct page *page) | 152 | unsigned int munlock_vma_page(struct page *page) |
150 | { | 153 | { |
151 | unsigned int page_mask = 0; | 154 | unsigned int nr_pages; |
152 | 155 | ||
153 | BUG_ON(!PageLocked(page)); | 156 | BUG_ON(!PageLocked(page)); |
154 | 157 | ||
155 | if (TestClearPageMlocked(page)) { | 158 | if (TestClearPageMlocked(page)) { |
156 | unsigned int nr_pages = hpage_nr_pages(page); | 159 | nr_pages = hpage_nr_pages(page); |
157 | mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); | 160 | mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); |
158 | page_mask = nr_pages - 1; | ||
159 | if (!isolate_lru_page(page)) | 161 | if (!isolate_lru_page(page)) |
160 | __munlock_isolated_page(page); | 162 | __munlock_isolated_page(page); |
161 | else | 163 | else |
162 | __munlock_isolation_failed(page); | 164 | __munlock_isolation_failed(page); |
165 | } else { | ||
166 | nr_pages = hpage_nr_pages(page); | ||
163 | } | 167 | } |
164 | 168 | ||
165 | return page_mask; | 169 | /* |
170 | * Regardless of the original PageMlocked flag, we determine nr_pages | ||
171 | * after touching the flag. This leaves a possible race with a THP page | ||
172 | * split, such that a whole THP page was munlocked, but nr_pages == 1. | ||
173 | * Returning a smaller mask due to that is OK, the worst that can | ||
174 | * happen is subsequent useless scanning of the former tail pages. | ||
175 | * The NR_MLOCK accounting can however become broken. | ||
176 | */ | ||
177 | return nr_pages - 1; | ||
166 | } | 178 | } |
167 | 179 | ||
168 | /** | 180 | /** |
@@ -286,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) | |||
286 | { | 298 | { |
287 | int i; | 299 | int i; |
288 | int nr = pagevec_count(pvec); | 300 | int nr = pagevec_count(pvec); |
289 | int delta_munlocked = -nr; | 301 | int delta_munlocked; |
290 | struct pagevec pvec_putback; | 302 | struct pagevec pvec_putback; |
291 | int pgrescued = 0; | 303 | int pgrescued = 0; |
292 | 304 | ||
305 | pagevec_init(&pvec_putback, 0); | ||
306 | |||
293 | /* Phase 1: page isolation */ | 307 | /* Phase 1: page isolation */ |
294 | spin_lock_irq(&zone->lru_lock); | 308 | spin_lock_irq(&zone->lru_lock); |
295 | for (i = 0; i < nr; i++) { | 309 | for (i = 0; i < nr; i++) { |
@@ -318,18 +332,21 @@ skip_munlock: | |||
318 | /* | 332 | /* |
319 | * We won't be munlocking this page in the next phase | 333 | * We won't be munlocking this page in the next phase |
320 | * but we still need to release the follow_page_mask() | 334 | * but we still need to release the follow_page_mask() |
321 | * pin. | 335 | * pin. We cannot do it under lru_lock however. If it's |
336 | * the last pin, __page_cache_release would deadlock. | ||
322 | */ | 337 | */ |
338 | pagevec_add(&pvec_putback, pvec->pages[i]); | ||
323 | pvec->pages[i] = NULL; | 339 | pvec->pages[i] = NULL; |
324 | put_page(page); | ||
325 | delta_munlocked++; | ||
326 | } | 340 | } |
327 | } | 341 | } |
342 | delta_munlocked = -nr + pagevec_count(&pvec_putback); | ||
328 | __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); | 343 | __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); |
329 | spin_unlock_irq(&zone->lru_lock); | 344 | spin_unlock_irq(&zone->lru_lock); |
330 | 345 | ||
346 | /* Now we can release pins of pages that we are not munlocking */ | ||
347 | pagevec_release(&pvec_putback); | ||
348 | |||
331 | /* Phase 2: page munlock */ | 349 | /* Phase 2: page munlock */ |
332 | pagevec_init(&pvec_putback, 0); | ||
333 | for (i = 0; i < nr; i++) { | 350 | for (i = 0; i < nr; i++) { |
334 | struct page *page = pvec->pages[i]; | 351 | struct page *page = pvec->pages[i]; |
335 | 352 | ||
@@ -440,7 +457,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
440 | 457 | ||
441 | while (start < end) { | 458 | while (start < end) { |
442 | struct page *page = NULL; | 459 | struct page *page = NULL; |
443 | unsigned int page_mask, page_increm; | 460 | unsigned int page_mask; |
461 | unsigned long page_increm; | ||
444 | struct pagevec pvec; | 462 | struct pagevec pvec; |
445 | struct zone *zone; | 463 | struct zone *zone; |
446 | int zoneid; | 464 | int zoneid; |
@@ -490,7 +508,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
490 | goto next; | 508 | goto next; |
491 | } | 509 | } |
492 | } | 510 | } |
493 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); | 511 | /* It's a bug to munlock in the middle of a THP page */ |
512 | VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); | ||
513 | page_increm = 1 + page_mask; | ||
494 | start += page_increm * PAGE_SIZE; | 514 | start += page_increm * PAGE_SIZE; |
495 | next: | 515 | next: |
496 | cond_resched(); | 516 | cond_resched(); |