aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Whitcroft <apw@shadowen.org>2007-08-22 17:01:26 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-08-22 22:52:45 -0400
commitc661b078fd62abe06fd11fab4ac5e4eeafe26b6d (patch)
treef21ffeed682900b783563e7d8a8dde906706cc88
parente9187bdcbbc06090757d565a3077e1b0ecab23d2 (diff)
synchronous lumpy reclaim: wait for page writeback when directly reclaiming contiguous areas
Lumpy reclaim works by selecting a lead page from the LRU list and then selecting pages for reclaim from the order-aligned area of pages. In the situation were all pages in that region are inactive and not referenced by any process over time, it works well. In the situation where there is even light load on the system, the pages may not free quickly. Out of a area of 1024 pages, maybe only 950 of them are freed when the allocation attempt occurs because lumpy reclaim returned early. This patch alters the behaviour of direct reclaim for large contiguous blocks. The first attempt to call shrink_page_list() is asynchronous but if it fails, the pages are submitted a second time and the calling process waits for the IO to complete. This may stall allocators waiting for contiguous memory but that should be expected behaviour for high-order users. It is preferable behaviour to potentially queueing unnecessary areas for IO. Note that kswapd will not stall in this fashion. [apw@shadowen.org: update to version 2] [apw@shadowen.org: update to version 3] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/vmscan.c68
1 files changed, 60 insertions, 8 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 99ec7fac33e0..a6e65d024995 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -271,6 +271,12 @@ static void handle_write_error(struct address_space *mapping,
271 unlock_page(page); 271 unlock_page(page);
272} 272}
273 273
274/* Request for sync pageout. */
275enum pageout_io {
276 PAGEOUT_IO_ASYNC,
277 PAGEOUT_IO_SYNC,
278};
279
274/* possible outcome of pageout() */ 280/* possible outcome of pageout() */
275typedef enum { 281typedef enum {
276 /* failed to write page out, page is locked */ 282 /* failed to write page out, page is locked */
@@ -287,7 +293,8 @@ typedef enum {
287 * pageout is called by shrink_page_list() for each dirty page. 293 * pageout is called by shrink_page_list() for each dirty page.
288 * Calls ->writepage(). 294 * Calls ->writepage().
289 */ 295 */
290static pageout_t pageout(struct page *page, struct address_space *mapping) 296static pageout_t pageout(struct page *page, struct address_space *mapping,
297 enum pageout_io sync_writeback)
291{ 298{
292 /* 299 /*
293 * If the page is dirty, only perform writeback if that write 300 * If the page is dirty, only perform writeback if that write
@@ -346,6 +353,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
346 ClearPageReclaim(page); 353 ClearPageReclaim(page);
347 return PAGE_ACTIVATE; 354 return PAGE_ACTIVATE;
348 } 355 }
356
357 /*
358 * Wait on writeback if requested to. This happens when
359 * direct reclaiming a large contiguous area and the
360 * first attempt to free a range of pages fails.
361 */
362 if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
363 wait_on_page_writeback(page);
364
349 if (!PageWriteback(page)) { 365 if (!PageWriteback(page)) {
350 /* synchronous write or broken a_ops? */ 366 /* synchronous write or broken a_ops? */
351 ClearPageReclaim(page); 367 ClearPageReclaim(page);
@@ -423,7 +439,8 @@ cannot_free:
423 * shrink_page_list() returns the number of reclaimed pages 439 * shrink_page_list() returns the number of reclaimed pages
424 */ 440 */
425static unsigned long shrink_page_list(struct list_head *page_list, 441static unsigned long shrink_page_list(struct list_head *page_list,
426 struct scan_control *sc) 442 struct scan_control *sc,
443 enum pageout_io sync_writeback)
427{ 444{
428 LIST_HEAD(ret_pages); 445 LIST_HEAD(ret_pages);
429 struct pagevec freed_pvec; 446 struct pagevec freed_pvec;
@@ -458,8 +475,23 @@ static unsigned long shrink_page_list(struct list_head *page_list,
458 if (page_mapped(page) || PageSwapCache(page)) 475 if (page_mapped(page) || PageSwapCache(page))
459 sc->nr_scanned++; 476 sc->nr_scanned++;
460 477
461 if (PageWriteback(page)) 478 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
462 goto keep_locked; 479 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
480
481 if (PageWriteback(page)) {
482 /*
483 * Synchronous reclaim is performed in two passes,
484 * first an asynchronous pass over the list to
485 * start parallel writeback, and a second synchronous
486 * pass to wait for the IO to complete. Wait here
487 * for any page for which writeback has already
488 * started.
489 */
490 if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
491 wait_on_page_writeback(page);
492 else
493 goto keep_locked;
494 }
463 495
464 referenced = page_referenced(page, 1); 496 referenced = page_referenced(page, 1);
465 /* In active use or really unfreeable? Activate it. */ 497 /* In active use or really unfreeable? Activate it. */
@@ -478,8 +510,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
478#endif /* CONFIG_SWAP */ 510#endif /* CONFIG_SWAP */
479 511
480 mapping = page_mapping(page); 512 mapping = page_mapping(page);
481 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
482 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
483 513
484 /* 514 /*
485 * The page is mapped into the page tables of one or more 515 * The page is mapped into the page tables of one or more
@@ -505,7 +535,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
505 goto keep_locked; 535 goto keep_locked;
506 536
507 /* Page is dirty, try to write it out here */ 537 /* Page is dirty, try to write it out here */
508 switch(pageout(page, mapping)) { 538 switch (pageout(page, mapping, sync_writeback)) {
509 case PAGE_KEEP: 539 case PAGE_KEEP:
510 goto keep_locked; 540 goto keep_locked;
511 case PAGE_ACTIVATE: 541 case PAGE_ACTIVATE:
@@ -786,7 +816,29 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
786 spin_unlock_irq(&zone->lru_lock); 816 spin_unlock_irq(&zone->lru_lock);
787 817
788 nr_scanned += nr_scan; 818 nr_scanned += nr_scan;
789 nr_freed = shrink_page_list(&page_list, sc); 819 nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
820
821 /*
822 * If we are direct reclaiming for contiguous pages and we do
823 * not reclaim everything in the list, try again and wait
824 * for IO to complete. This will stall high-order allocations
825 * but that should be acceptable to the caller
826 */
827 if (nr_freed < nr_taken && !current_is_kswapd() &&
828 sc->order > PAGE_ALLOC_COSTLY_ORDER) {
829 congestion_wait(WRITE, HZ/10);
830
831 /*
832 * The attempt at page out may have made some
833 * of the pages active, mark them inactive again.
834 */
835 nr_active = clear_active_flags(&page_list);
836 count_vm_events(PGDEACTIVATE, nr_active);
837
838 nr_freed += shrink_page_list(&page_list, sc,
839 PAGEOUT_IO_SYNC);
840 }
841
790 nr_reclaimed += nr_freed; 842 nr_reclaimed += nr_freed;
791 local_irq_disable(); 843 local_irq_disable();
792 if (current_is_kswapd()) { 844 if (current_is_kswapd()) {