diff options
-rw-r--r-- | include/linux/mm.h | 12 | ||||
-rw-r--r-- | mm/mlock.c | 110 |
2 files changed, 85 insertions, 37 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index dce24569f8fc..03f84b8d7359 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -643,12 +643,12 @@ static inline enum zone_type page_zonenum(const struct page *page) | |||
643 | #endif | 643 | #endif |
644 | 644 | ||
645 | /* | 645 | /* |
646 | * The identification function is only used by the buddy allocator for | 646 | * The identification function is mainly used by the buddy allocator for |
647 | * determining if two pages could be buddies. We are not really | 647 | * determining if two pages could be buddies. We are not really identifying |
648 | * identifying a zone since we could be using a the section number | 648 | * the zone since we could be using the section number id if we do not have |
649 | * id if we have not node id available in page flags. | 649 | * node id available in page flags. |
650 | * We guarantee only that it will return the same value for two | 650 | * We only guarantee that it will return the same value for two combinable |
651 | * combinable pages in a zone. | 651 | * pages in a zone. |
652 | */ | 652 | */ |
653 | static inline int page_zone_id(struct page *page) | 653 | static inline int page_zone_id(struct page *page) |
654 | { | 654 | { |
diff --git a/mm/mlock.c b/mm/mlock.c index 19a934dce5d6..d63802663242 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -280,8 +280,7 @@ static void __putback_lru_fast(struct pagevec *pvec, int pgrescued) | |||
280 | * The second phase finishes the munlock only for pages where isolation | 280 | * The second phase finishes the munlock only for pages where isolation |
281 | * succeeded. | 281 | * succeeded. |
282 | * | 282 | * |
283 | * Note that pvec is modified during the process. Before returning | 283 | * Note that the pagevec may be modified during the process. |
284 | * pagevec_reinit() is called on it. | ||
285 | */ | 284 | */ |
286 | static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) | 285 | static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) |
287 | { | 286 | { |
@@ -356,8 +355,60 @@ skip_munlock: | |||
356 | */ | 355 | */ |
357 | if (pagevec_count(&pvec_putback)) | 356 | if (pagevec_count(&pvec_putback)) |
358 | __putback_lru_fast(&pvec_putback, pgrescued); | 357 | __putback_lru_fast(&pvec_putback, pgrescued); |
358 | } | ||
359 | |||
360 | /* | ||
361 | * Fill up pagevec for __munlock_pagevec using pte walk | ||
362 | * | ||
363 | * The function expects that the struct page corresponding to @start address is | ||
364 | * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone. | ||
365 | * | ||
366 | * The rest of @pvec is filled by subsequent pages within the same pmd and same | ||
367 | * zone, as long as the pte's are present and vm_normal_page() succeeds. These | ||
368 | * pages also get pinned. | ||
369 | * | ||
370 | * Returns the address of the next page that should be scanned. This equals | ||
371 | * @start + PAGE_SIZE when no page could be added by the pte walk. | ||
372 | */ | ||
373 | static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, | ||
374 | struct vm_area_struct *vma, int zoneid, unsigned long start, | ||
375 | unsigned long end) | ||
376 | { | ||
377 | pte_t *pte; | ||
378 | spinlock_t *ptl; | ||
379 | |||
380 | /* | ||
381 | * Initialize pte walk starting at the already pinned page where we | ||
382 | * are sure that there is a pte. | ||
383 | */ | ||
384 | pte = get_locked_pte(vma->vm_mm, start, &ptl); | ||
385 | end = min(end, pmd_addr_end(start, end)); | ||
386 | |||
387 | /* The page next to the pinned page is the first we will try to get */ | ||
388 | start += PAGE_SIZE; | ||
389 | while (start < end) { | ||
390 | struct page *page = NULL; | ||
391 | pte++; | ||
392 | if (pte_present(*pte)) | ||
393 | page = vm_normal_page(vma, start, *pte); | ||
394 | /* | ||
395 | * Break if page could not be obtained or the page's node+zone does not | ||
396 | * match | ||
397 | */ | ||
398 | if (!page || page_zone_id(page) != zoneid) | ||
399 | break; | ||
359 | 400 | ||
360 | pagevec_reinit(pvec); | 401 | get_page(page); |
402 | /* | ||
403 | * Increase the address that will be returned *before* the | ||
404 | * eventual break due to pvec becoming full by adding the page | ||
405 | */ | ||
406 | start += PAGE_SIZE; | ||
407 | if (pagevec_add(pvec, page) == 0) | ||
408 | break; | ||
409 | } | ||
410 | pte_unmap_unlock(pte, ptl); | ||
411 | return start; | ||
361 | } | 412 | } |
362 | 413 | ||
363 | /* | 414 | /* |
@@ -381,17 +432,16 @@ skip_munlock: | |||
381 | void munlock_vma_pages_range(struct vm_area_struct *vma, | 432 | void munlock_vma_pages_range(struct vm_area_struct *vma, |
382 | unsigned long start, unsigned long end) | 433 | unsigned long start, unsigned long end) |
383 | { | 434 | { |
384 | struct pagevec pvec; | ||
385 | struct zone *zone = NULL; | ||
386 | |||
387 | pagevec_init(&pvec, 0); | ||
388 | vma->vm_flags &= ~VM_LOCKED; | 435 | vma->vm_flags &= ~VM_LOCKED; |
389 | 436 | ||
390 | while (start < end) { | 437 | while (start < end) { |
391 | struct page *page; | 438 | struct page *page = NULL; |
392 | unsigned int page_mask, page_increm; | 439 | unsigned int page_mask, page_increm; |
393 | struct zone *pagezone; | 440 | struct pagevec pvec; |
441 | struct zone *zone; | ||
442 | int zoneid; | ||
394 | 443 | ||
444 | pagevec_init(&pvec, 0); | ||
395 | /* | 445 | /* |
396 | * Although FOLL_DUMP is intended for get_dump_page(), | 446 | * Although FOLL_DUMP is intended for get_dump_page(), |
397 | * it just so happens that its special treatment of the | 447 | * it just so happens that its special treatment of the |
@@ -400,22 +450,10 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
400 | * has sneaked into the range, we won't oops here: great). | 450 | * has sneaked into the range, we won't oops here: great). |
401 | */ | 451 | */ |
402 | page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, | 452 | page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, |
403 | &page_mask); | 453 | &page_mask); |
454 | |||
404 | if (page && !IS_ERR(page)) { | 455 | if (page && !IS_ERR(page)) { |
405 | pagezone = page_zone(page); | ||
406 | /* The whole pagevec must be in the same zone */ | ||
407 | if (pagezone != zone) { | ||
408 | if (pagevec_count(&pvec)) | ||
409 | __munlock_pagevec(&pvec, zone); | ||
410 | zone = pagezone; | ||
411 | } | ||
412 | if (PageTransHuge(page)) { | 456 | if (PageTransHuge(page)) { |
413 | /* | ||
414 | * THP pages are not handled by pagevec due | ||
415 | * to their possible split (see below). | ||
416 | */ | ||
417 | if (pagevec_count(&pvec)) | ||
418 | __munlock_pagevec(&pvec, zone); | ||
419 | lock_page(page); | 457 | lock_page(page); |
420 | /* | 458 | /* |
421 | * Any THP page found by follow_page_mask() may | 459 | * Any THP page found by follow_page_mask() may |
@@ -428,21 +466,31 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
428 | put_page(page); /* follow_page_mask() */ | 466 | put_page(page); /* follow_page_mask() */ |
429 | } else { | 467 | } else { |
430 | /* | 468 | /* |
431 | * Non-huge pages are handled in batches | 469 | * Non-huge pages are handled in batches via |
432 | * via pagevec. The pin from | 470 | * pagevec. The pin from follow_page_mask() |
433 | * follow_page_mask() prevents them from | 471 | * prevents them from collapsing by THP. |
434 | * collapsing by THP. | 472 | */ |
473 | pagevec_add(&pvec, page); | ||
474 | zone = page_zone(page); | ||
475 | zoneid = page_zone_id(page); | ||
476 | |||
477 | /* | ||
478 | * Try to fill the rest of pagevec using fast | ||
479 | * pte walk. This will also update start to | ||
480 | * the next page to process. Then munlock the | ||
481 | * pagevec. | ||
435 | */ | 482 | */ |
436 | if (pagevec_add(&pvec, page) == 0) | 483 | start = __munlock_pagevec_fill(&pvec, vma, |
437 | __munlock_pagevec(&pvec, zone); | 484 | zoneid, start, end); |
485 | __munlock_pagevec(&pvec, zone); | ||
486 | goto next; | ||
438 | } | 487 | } |
439 | } | 488 | } |
440 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); | 489 | page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); |
441 | start += page_increm * PAGE_SIZE; | 490 | start += page_increm * PAGE_SIZE; |
491 | next: | ||
442 | cond_resched(); | 492 | cond_resched(); |
443 | } | 493 | } |
444 | if (pagevec_count(&pvec)) | ||
445 | __munlock_pagevec(&pvec, zone); | ||
446 | } | 494 | } |
447 | 495 | ||
448 | /* | 496 | /* |