aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2008-10-18 23:26:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:52:31 -0400
commitaf936a1606246a10c145feac3770f6287f483f02 (patch)
tree8b1ca7fabb5c749ffdecd654519889c6c2ed2fb6
parent64d6519dda3905dfb94d3f93c07c5f263f41813f (diff)
vmscan: unevictable LRU scan sysctl
This patch adds a function to scan individual or all zones' unevictable lists and move any pages that have become evictable onto the respective zone's inactive list, where shrink_inactive_list() will deal with them. Adds sysctl to scan all nodes, and per node attributes to individual nodes' zones. Kosaki: If evictable page found in unevictable lru when write /proc/sys/vm/scan_unevictable_pages, print filename and file offset of these pages. [akpm@linux-foundation.org: fix one CONFIG_MMU=n build error] [kosaki.motohiro@jp.fujitsu.com: adapt vmscan-unevictable-lru-scan-sysctl.patch to new sysfs API] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/base/node.c5
-rw-r--r--include/linux/rmap.h3
-rw-r--r--include/linux/swap.h15
-rw-r--r--kernel/sysctl.c10
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/vmscan.c166
6 files changed, 201 insertions, 2 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c
index fb45d88a2446..f5207090885a 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -13,6 +13,7 @@
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/device.h> 15#include <linux/device.h>
16#include <linux/swap.h>
16 17
17static struct sysdev_class node_class = { 18static struct sysdev_class node_class = {
18 .name = "node", 19 .name = "node",
@@ -191,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent)
191 sysdev_create_file(&node->sysdev, &attr_meminfo); 192 sysdev_create_file(&node->sysdev, &attr_meminfo);
192 sysdev_create_file(&node->sysdev, &attr_numastat); 193 sysdev_create_file(&node->sysdev, &attr_numastat);
193 sysdev_create_file(&node->sysdev, &attr_distance); 194 sysdev_create_file(&node->sysdev, &attr_distance);
195
196 scan_unevictable_register_node(node);
194 } 197 }
195 return error; 198 return error;
196} 199}
@@ -210,6 +213,8 @@ void unregister_node(struct node *node)
210 sysdev_remove_file(&node->sysdev, &attr_numastat); 213 sysdev_remove_file(&node->sysdev, &attr_numastat);
211 sysdev_remove_file(&node->sysdev, &attr_distance); 214 sysdev_remove_file(&node->sysdev, &attr_distance);
212 215
216 scan_unevictable_unregister_node(node);
217
213 sysdev_unregister(&node->sysdev); 218 sysdev_unregister(&node->sysdev);
214} 219}
215 220
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 955667e6a52d..1da48db8db09 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -75,6 +75,9 @@ void anon_vma_unlink(struct vm_area_struct *);
75void anon_vma_link(struct vm_area_struct *); 75void anon_vma_link(struct vm_area_struct *);
76void __anon_vma_link(struct vm_area_struct *); 76void __anon_vma_link(struct vm_area_struct *);
77 77
78extern struct anon_vma *page_lock_anon_vma(struct page *page);
79extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
80
78/* 81/*
79 * rmap interfaces called when adding or removing pte of page 82 * rmap interfaces called when adding or removing pte of page
80 */ 83 */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 07eda69412fb..a3af95b2cb6d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -7,6 +7,7 @@
7#include <linux/list.h> 7#include <linux/list.h>
8#include <linux/memcontrol.h> 8#include <linux/memcontrol.h>
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/node.h>
10 11
11#include <asm/atomic.h> 12#include <asm/atomic.h>
12#include <asm/page.h> 13#include <asm/page.h>
@@ -235,15 +236,29 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
235#ifdef CONFIG_UNEVICTABLE_LRU 236#ifdef CONFIG_UNEVICTABLE_LRU
236extern int page_evictable(struct page *page, struct vm_area_struct *vma); 237extern int page_evictable(struct page *page, struct vm_area_struct *vma);
237extern void scan_mapping_unevictable_pages(struct address_space *); 238extern void scan_mapping_unevictable_pages(struct address_space *);
239
240extern unsigned long scan_unevictable_pages;
241extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
242 void __user *, size_t *, loff_t *);
243extern int scan_unevictable_register_node(struct node *node);
244extern void scan_unevictable_unregister_node(struct node *node);
238#else 245#else
239static inline int page_evictable(struct page *page, 246static inline int page_evictable(struct page *page,
240 struct vm_area_struct *vma) 247 struct vm_area_struct *vma)
241{ 248{
242 return 1; 249 return 1;
243} 250}
251
244static inline void scan_mapping_unevictable_pages(struct address_space *mapping) 252static inline void scan_mapping_unevictable_pages(struct address_space *mapping)
245{ 253{
246} 254}
255
256static inline int scan_unevictable_register_node(struct node *node)
257{
258 return 0;
259}
260
261static inline void scan_unevictable_unregister_node(struct node *node) { }
247#endif 262#endif
248 263
249extern int kswapd_run(int nid); 264extern int kswapd_run(int nid);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a0..b3cc73931d1f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = {
833 .proc_handler = &proc_dointvec, 833 .proc_handler = &proc_dointvec,
834 }, 834 },
835#endif 835#endif
836#ifdef CONFIG_UNEVICTABLE_LRU
837 {
838 .ctl_name = CTL_UNNUMBERED,
839 .procname = "scan_unevictable_pages",
840 .data = &scan_unevictable_pages,
841 .maxlen = sizeof(scan_unevictable_pages),
842 .mode = 0644,
843 .proc_handler = &scan_unevictable_handler,
844 },
845#endif
836/* 846/*
837 * NOTE: do not add new entries to this table unless you have read 847 * NOTE: do not add new entries to this table unless you have read
838 * Documentation/sysctl/ctl_unnumbered.txt 848 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/rmap.c b/mm/rmap.c
index 7e60df99018e..7e90bebbeb6c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -181,7 +181,7 @@ void __init anon_vma_init(void)
181 * Getting a lock on a stable anon_vma from a page off the LRU is 181 * Getting a lock on a stable anon_vma from a page off the LRU is
182 * tricky: page_lock_anon_vma rely on RCU to guard against the races. 182 * tricky: page_lock_anon_vma rely on RCU to guard against the races.
183 */ 183 */
184static struct anon_vma *page_lock_anon_vma(struct page *page) 184struct anon_vma *page_lock_anon_vma(struct page *page)
185{ 185{
186 struct anon_vma *anon_vma; 186 struct anon_vma *anon_vma;
187 unsigned long anon_mapping; 187 unsigned long anon_mapping;
@@ -201,7 +201,7 @@ out:
201 return NULL; 201 return NULL;
202} 202}
203 203
204static void page_unlock_anon_vma(struct anon_vma *anon_vma) 204void page_unlock_anon_vma(struct anon_vma *anon_vma)
205{ 205{
206 spin_unlock(&anon_vma->lock); 206 spin_unlock(&anon_vma->lock);
207 rcu_read_unlock(); 207 rcu_read_unlock();
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5aaaad159ef..ca64e3e0c518 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -39,6 +39,7 @@
39#include <linux/freezer.h> 39#include <linux/freezer.h>
40#include <linux/memcontrol.h> 40#include <linux/memcontrol.h>
41#include <linux/delayacct.h> 41#include <linux/delayacct.h>
42#include <linux/sysctl.h>
42 43
43#include <asm/tlbflush.h> 44#include <asm/tlbflush.h>
44#include <asm/div64.h> 45#include <asm/div64.h>
@@ -2363,6 +2364,39 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
2363 return 1; 2364 return 1;
2364} 2365}
2365 2366
2367static void show_page_path(struct page *page)
2368{
2369 char buf[256];
2370 if (page_is_file_cache(page)) {
2371 struct address_space *mapping = page->mapping;
2372 struct dentry *dentry;
2373 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
2374
2375 spin_lock(&mapping->i_mmap_lock);
2376 dentry = d_find_alias(mapping->host);
2377 printk(KERN_INFO "rescued: %s %lu\n",
2378 dentry_path(dentry, buf, 256), pgoff);
2379 spin_unlock(&mapping->i_mmap_lock);
2380 } else {
2381#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
2382 struct anon_vma *anon_vma;
2383 struct vm_area_struct *vma;
2384
2385 anon_vma = page_lock_anon_vma(page);
2386 if (!anon_vma)
2387 return;
2388
2389 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
2390 printk(KERN_INFO "rescued: anon %s\n",
2391 vma->vm_mm->owner->comm);
2392 break;
2393 }
2394 page_unlock_anon_vma(anon_vma);
2395#endif
2396 }
2397}
2398
2399
2366/** 2400/**
2367 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list 2401 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
2368 * @page: page to check evictability and move to appropriate lru list 2402 * @page: page to check evictability and move to appropriate lru list
@@ -2382,6 +2416,9 @@ retry:
2382 ClearPageUnevictable(page); 2416 ClearPageUnevictable(page);
2383 if (page_evictable(page, NULL)) { 2417 if (page_evictable(page, NULL)) {
2384 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); 2418 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
2419
2420 show_page_path(page);
2421
2385 __dec_zone_state(zone, NR_UNEVICTABLE); 2422 __dec_zone_state(zone, NR_UNEVICTABLE);
2386 list_move(&page->lru, &zone->lru[l].list); 2423 list_move(&page->lru, &zone->lru[l].list);
2387 __inc_zone_state(zone, NR_INACTIVE_ANON + l); 2424 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
@@ -2451,4 +2488,133 @@ void scan_mapping_unevictable_pages(struct address_space *mapping)
2451 } 2488 }
2452 2489
2453} 2490}
2491
2492/**
2493 * scan_zone_unevictable_pages - check unevictable list for evictable pages
2494 * @zone - zone of which to scan the unevictable list
2495 *
2496 * Scan @zone's unevictable LRU lists to check for pages that have become
2497 * evictable. Move those that have to @zone's inactive list where they
2498 * become candidates for reclaim, unless shrink_inactive_zone() decides
2499 * to reactivate them. Pages that are still unevictable are rotated
2500 * back onto @zone's unevictable list.
2501 */
2502#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */
2503void scan_zone_unevictable_pages(struct zone *zone)
2504{
2505 struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list;
2506 unsigned long scan;
2507 unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE);
2508
2509 while (nr_to_scan > 0) {
2510 unsigned long batch_size = min(nr_to_scan,
2511 SCAN_UNEVICTABLE_BATCH_SIZE);
2512
2513 spin_lock_irq(&zone->lru_lock);
2514 for (scan = 0; scan < batch_size; scan++) {
2515 struct page *page = lru_to_page(l_unevictable);
2516
2517 if (!trylock_page(page))
2518 continue;
2519
2520 prefetchw_prev_lru_page(page, l_unevictable, flags);
2521
2522 if (likely(PageLRU(page) && PageUnevictable(page)))
2523 check_move_unevictable_page(page, zone);
2524
2525 unlock_page(page);
2526 }
2527 spin_unlock_irq(&zone->lru_lock);
2528
2529 nr_to_scan -= batch_size;
2530 }
2531}
2532
2533
2534/**
2535 * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages
2536 *
2537 * A really big hammer: scan all zones' unevictable LRU lists to check for
2538 * pages that have become evictable. Move those back to the zones'
2539 * inactive list where they become candidates for reclaim.
2540 * This occurs when, e.g., we have unswappable pages on the unevictable lists,
2541 * and we add swap to the system. As such, it runs in the context of a task
2542 * that has possibly/probably made some previously unevictable pages
2543 * evictable.
2544 */
2545void scan_all_zones_unevictable_pages(void)
2546{
2547 struct zone *zone;
2548
2549 for_each_zone(zone) {
2550 scan_zone_unevictable_pages(zone);
2551 }
2552}
2553
2554/*
2555 * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of
2556 * all nodes' unevictable lists for evictable pages
2557 */
2558unsigned long scan_unevictable_pages;
2559
2560int scan_unevictable_handler(struct ctl_table *table, int write,
2561 struct file *file, void __user *buffer,
2562 size_t *length, loff_t *ppos)
2563{
2564 proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
2565
2566 if (write && *(unsigned long *)table->data)
2567 scan_all_zones_unevictable_pages();
2568
2569 scan_unevictable_pages = 0;
2570 return 0;
2571}
2572
2573/*
2574 * per node 'scan_unevictable_pages' attribute. On demand re-scan of
2575 * a specified node's per zone unevictable lists for evictable pages.
2576 */
2577
2578static ssize_t read_scan_unevictable_node(struct sys_device *dev,
2579 struct sysdev_attribute *attr,
2580 char *buf)
2581{
2582 return sprintf(buf, "0\n"); /* always zero; should fit... */
2583}
2584
2585static ssize_t write_scan_unevictable_node(struct sys_device *dev,
2586 struct sysdev_attribute *attr,
2587 const char *buf, size_t count)
2588{
2589 struct zone *node_zones = NODE_DATA(dev->id)->node_zones;
2590 struct zone *zone;
2591 unsigned long res;
2592 unsigned long req = strict_strtoul(buf, 10, &res);
2593
2594 if (!req)
2595 return 1; /* zero is no-op */
2596
2597 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
2598 if (!populated_zone(zone))
2599 continue;
2600 scan_zone_unevictable_pages(zone);
2601 }
2602 return 1;
2603}
2604
2605
2606static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
2607 read_scan_unevictable_node,
2608 write_scan_unevictable_node);
2609
2610int scan_unevictable_register_node(struct node *node)
2611{
2612 return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
2613}
2614
2615void scan_unevictable_unregister_node(struct node *node)
2616{
2617 sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
2618}
2619
2454#endif 2620#endif