diff options
author | Lee Schermerhorn <lee.schermerhorn@hp.com> | 2008-10-18 23:26:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:52:31 -0400 |
commit | af936a1606246a10c145feac3770f6287f483f02 (patch) | |
tree | 8b1ca7fabb5c749ffdecd654519889c6c2ed2fb6 | |
parent | 64d6519dda3905dfb94d3f93c07c5f263f41813f (diff) |
vmscan: unevictable LRU scan sysctl
This patch adds a function to scan individual or all zones' unevictable
lists and move any pages that have become evictable onto the respective
zone's inactive list, where shrink_inactive_list() will deal with them.
Adds sysctl to scan all nodes, and per node attributes to individual
nodes' zones.
Kosaki: If evictable page found in unevictable lru when write
/proc/sys/vm/scan_unevictable_pages, print filename and file offset of
these pages.
[akpm@linux-foundation.org: fix one CONFIG_MMU=n build error]
[kosaki.motohiro@jp.fujitsu.com: adapt vmscan-unevictable-lru-scan-sysctl.patch to new sysfs API]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/base/node.c | 5 | ||||
-rw-r--r-- | include/linux/rmap.h | 3 | ||||
-rw-r--r-- | include/linux/swap.h | 15 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 | ||||
-rw-r--r-- | mm/rmap.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 166 |
6 files changed, 201 insertions, 2 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c index fb45d88a2446..f5207090885a 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <linux/device.h> | 15 | #include <linux/device.h> |
16 | #include <linux/swap.h> | ||
16 | 17 | ||
17 | static struct sysdev_class node_class = { | 18 | static struct sysdev_class node_class = { |
18 | .name = "node", | 19 | .name = "node", |
@@ -191,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent) | |||
191 | sysdev_create_file(&node->sysdev, &attr_meminfo); | 192 | sysdev_create_file(&node->sysdev, &attr_meminfo); |
192 | sysdev_create_file(&node->sysdev, &attr_numastat); | 193 | sysdev_create_file(&node->sysdev, &attr_numastat); |
193 | sysdev_create_file(&node->sysdev, &attr_distance); | 194 | sysdev_create_file(&node->sysdev, &attr_distance); |
195 | |||
196 | scan_unevictable_register_node(node); | ||
194 | } | 197 | } |
195 | return error; | 198 | return error; |
196 | } | 199 | } |
@@ -210,6 +213,8 @@ void unregister_node(struct node *node) | |||
210 | sysdev_remove_file(&node->sysdev, &attr_numastat); | 213 | sysdev_remove_file(&node->sysdev, &attr_numastat); |
211 | sysdev_remove_file(&node->sysdev, &attr_distance); | 214 | sysdev_remove_file(&node->sysdev, &attr_distance); |
212 | 215 | ||
216 | scan_unevictable_unregister_node(node); | ||
217 | |||
213 | sysdev_unregister(&node->sysdev); | 218 | sysdev_unregister(&node->sysdev); |
214 | } | 219 | } |
215 | 220 | ||
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 955667e6a52d..1da48db8db09 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -75,6 +75,9 @@ void anon_vma_unlink(struct vm_area_struct *); | |||
75 | void anon_vma_link(struct vm_area_struct *); | 75 | void anon_vma_link(struct vm_area_struct *); |
76 | void __anon_vma_link(struct vm_area_struct *); | 76 | void __anon_vma_link(struct vm_area_struct *); |
77 | 77 | ||
78 | extern struct anon_vma *page_lock_anon_vma(struct page *page); | ||
79 | extern void page_unlock_anon_vma(struct anon_vma *anon_vma); | ||
80 | |||
78 | /* | 81 | /* |
79 | * rmap interfaces called when adding or removing pte of page | 82 | * rmap interfaces called when adding or removing pte of page |
80 | */ | 83 | */ |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 07eda69412fb..a3af95b2cb6d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/list.h> | 7 | #include <linux/list.h> |
8 | #include <linux/memcontrol.h> | 8 | #include <linux/memcontrol.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/node.h> | ||
10 | 11 | ||
11 | #include <asm/atomic.h> | 12 | #include <asm/atomic.h> |
12 | #include <asm/page.h> | 13 | #include <asm/page.h> |
@@ -235,15 +236,29 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) | |||
235 | #ifdef CONFIG_UNEVICTABLE_LRU | 236 | #ifdef CONFIG_UNEVICTABLE_LRU |
236 | extern int page_evictable(struct page *page, struct vm_area_struct *vma); | 237 | extern int page_evictable(struct page *page, struct vm_area_struct *vma); |
237 | extern void scan_mapping_unevictable_pages(struct address_space *); | 238 | extern void scan_mapping_unevictable_pages(struct address_space *); |
239 | |||
240 | extern unsigned long scan_unevictable_pages; | ||
241 | extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, | ||
242 | void __user *, size_t *, loff_t *); | ||
243 | extern int scan_unevictable_register_node(struct node *node); | ||
244 | extern void scan_unevictable_unregister_node(struct node *node); | ||
238 | #else | 245 | #else |
239 | static inline int page_evictable(struct page *page, | 246 | static inline int page_evictable(struct page *page, |
240 | struct vm_area_struct *vma) | 247 | struct vm_area_struct *vma) |
241 | { | 248 | { |
242 | return 1; | 249 | return 1; |
243 | } | 250 | } |
251 | |||
244 | static inline void scan_mapping_unevictable_pages(struct address_space *mapping) | 252 | static inline void scan_mapping_unevictable_pages(struct address_space *mapping) |
245 | { | 253 | { |
246 | } | 254 | } |
255 | |||
256 | static inline int scan_unevictable_register_node(struct node *node) | ||
257 | { | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static inline void scan_unevictable_unregister_node(struct node *node) { } | ||
247 | #endif | 262 | #endif |
248 | 263 | ||
249 | extern int kswapd_run(int nid); | 264 | extern int kswapd_run(int nid); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 617d41e4d6a0..b3cc73931d1f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = { | |||
833 | .proc_handler = &proc_dointvec, | 833 | .proc_handler = &proc_dointvec, |
834 | }, | 834 | }, |
835 | #endif | 835 | #endif |
836 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
837 | { | ||
838 | .ctl_name = CTL_UNNUMBERED, | ||
839 | .procname = "scan_unevictable_pages", | ||
840 | .data = &scan_unevictable_pages, | ||
841 | .maxlen = sizeof(scan_unevictable_pages), | ||
842 | .mode = 0644, | ||
843 | .proc_handler = &scan_unevictable_handler, | ||
844 | }, | ||
845 | #endif | ||
836 | /* | 846 | /* |
837 | * NOTE: do not add new entries to this table unless you have read | 847 | * NOTE: do not add new entries to this table unless you have read |
838 | * Documentation/sysctl/ctl_unnumbered.txt | 848 | * Documentation/sysctl/ctl_unnumbered.txt |
@@ -181,7 +181,7 @@ void __init anon_vma_init(void) | |||
181 | * Getting a lock on a stable anon_vma from a page off the LRU is | 181 | * Getting a lock on a stable anon_vma from a page off the LRU is |
182 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. | 182 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. |
183 | */ | 183 | */ |
184 | static struct anon_vma *page_lock_anon_vma(struct page *page) | 184 | struct anon_vma *page_lock_anon_vma(struct page *page) |
185 | { | 185 | { |
186 | struct anon_vma *anon_vma; | 186 | struct anon_vma *anon_vma; |
187 | unsigned long anon_mapping; | 187 | unsigned long anon_mapping; |
@@ -201,7 +201,7 @@ out: | |||
201 | return NULL; | 201 | return NULL; |
202 | } | 202 | } |
203 | 203 | ||
204 | static void page_unlock_anon_vma(struct anon_vma *anon_vma) | 204 | void page_unlock_anon_vma(struct anon_vma *anon_vma) |
205 | { | 205 | { |
206 | spin_unlock(&anon_vma->lock); | 206 | spin_unlock(&anon_vma->lock); |
207 | rcu_read_unlock(); | 207 | rcu_read_unlock(); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index e5aaaad159ef..ca64e3e0c518 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/freezer.h> | 39 | #include <linux/freezer.h> |
40 | #include <linux/memcontrol.h> | 40 | #include <linux/memcontrol.h> |
41 | #include <linux/delayacct.h> | 41 | #include <linux/delayacct.h> |
42 | #include <linux/sysctl.h> | ||
42 | 43 | ||
43 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
44 | #include <asm/div64.h> | 45 | #include <asm/div64.h> |
@@ -2363,6 +2364,39 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) | |||
2363 | return 1; | 2364 | return 1; |
2364 | } | 2365 | } |
2365 | 2366 | ||
2367 | static void show_page_path(struct page *page) | ||
2368 | { | ||
2369 | char buf[256]; | ||
2370 | if (page_is_file_cache(page)) { | ||
2371 | struct address_space *mapping = page->mapping; | ||
2372 | struct dentry *dentry; | ||
2373 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
2374 | |||
2375 | spin_lock(&mapping->i_mmap_lock); | ||
2376 | dentry = d_find_alias(mapping->host); | ||
2377 | printk(KERN_INFO "rescued: %s %lu\n", | ||
2378 | dentry_path(dentry, buf, 256), pgoff); | ||
2379 | spin_unlock(&mapping->i_mmap_lock); | ||
2380 | } else { | ||
2381 | #if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU) | ||
2382 | struct anon_vma *anon_vma; | ||
2383 | struct vm_area_struct *vma; | ||
2384 | |||
2385 | anon_vma = page_lock_anon_vma(page); | ||
2386 | if (!anon_vma) | ||
2387 | return; | ||
2388 | |||
2389 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | ||
2390 | printk(KERN_INFO "rescued: anon %s\n", | ||
2391 | vma->vm_mm->owner->comm); | ||
2392 | break; | ||
2393 | } | ||
2394 | page_unlock_anon_vma(anon_vma); | ||
2395 | #endif | ||
2396 | } | ||
2397 | } | ||
2398 | |||
2399 | |||
2366 | /** | 2400 | /** |
2367 | * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list | 2401 | * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list |
2368 | * @page: page to check evictability and move to appropriate lru list | 2402 | * @page: page to check evictability and move to appropriate lru list |
@@ -2382,6 +2416,9 @@ retry: | |||
2382 | ClearPageUnevictable(page); | 2416 | ClearPageUnevictable(page); |
2383 | if (page_evictable(page, NULL)) { | 2417 | if (page_evictable(page, NULL)) { |
2384 | enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); | 2418 | enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); |
2419 | |||
2420 | show_page_path(page); | ||
2421 | |||
2385 | __dec_zone_state(zone, NR_UNEVICTABLE); | 2422 | __dec_zone_state(zone, NR_UNEVICTABLE); |
2386 | list_move(&page->lru, &zone->lru[l].list); | 2423 | list_move(&page->lru, &zone->lru[l].list); |
2387 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); | 2424 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); |
@@ -2451,4 +2488,133 @@ void scan_mapping_unevictable_pages(struct address_space *mapping) | |||
2451 | } | 2488 | } |
2452 | 2489 | ||
2453 | } | 2490 | } |
2491 | |||
2492 | /** | ||
2493 | * scan_zone_unevictable_pages - check unevictable list for evictable pages | ||
2494 | * @zone - zone of which to scan the unevictable list | ||
2495 | * | ||
2496 | * Scan @zone's unevictable LRU lists to check for pages that have become | ||
2497 | * evictable. Move those that have to @zone's inactive list where they | ||
2498 | * become candidates for reclaim, unless shrink_inactive_zone() decides | ||
2499 | * to reactivate them. Pages that are still unevictable are rotated | ||
2500 | * back onto @zone's unevictable list. | ||
2501 | */ | ||
2502 | #define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ | ||
2503 | void scan_zone_unevictable_pages(struct zone *zone) | ||
2504 | { | ||
2505 | struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; | ||
2506 | unsigned long scan; | ||
2507 | unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE); | ||
2508 | |||
2509 | while (nr_to_scan > 0) { | ||
2510 | unsigned long batch_size = min(nr_to_scan, | ||
2511 | SCAN_UNEVICTABLE_BATCH_SIZE); | ||
2512 | |||
2513 | spin_lock_irq(&zone->lru_lock); | ||
2514 | for (scan = 0; scan < batch_size; scan++) { | ||
2515 | struct page *page = lru_to_page(l_unevictable); | ||
2516 | |||
2517 | if (!trylock_page(page)) | ||
2518 | continue; | ||
2519 | |||
2520 | prefetchw_prev_lru_page(page, l_unevictable, flags); | ||
2521 | |||
2522 | if (likely(PageLRU(page) && PageUnevictable(page))) | ||
2523 | check_move_unevictable_page(page, zone); | ||
2524 | |||
2525 | unlock_page(page); | ||
2526 | } | ||
2527 | spin_unlock_irq(&zone->lru_lock); | ||
2528 | |||
2529 | nr_to_scan -= batch_size; | ||
2530 | } | ||
2531 | } | ||
2532 | |||
2533 | |||
2534 | /** | ||
2535 | * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages | ||
2536 | * | ||
2537 | * A really big hammer: scan all zones' unevictable LRU lists to check for | ||
2538 | * pages that have become evictable. Move those back to the zones' | ||
2539 | * inactive list where they become candidates for reclaim. | ||
2540 | * This occurs when, e.g., we have unswappable pages on the unevictable lists, | ||
2541 | * and we add swap to the system. As such, it runs in the context of a task | ||
2542 | * that has possibly/probably made some previously unevictable pages | ||
2543 | * evictable. | ||
2544 | */ | ||
2545 | void scan_all_zones_unevictable_pages(void) | ||
2546 | { | ||
2547 | struct zone *zone; | ||
2548 | |||
2549 | for_each_zone(zone) { | ||
2550 | scan_zone_unevictable_pages(zone); | ||
2551 | } | ||
2552 | } | ||
2553 | |||
2554 | /* | ||
2555 | * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of | ||
2556 | * all nodes' unevictable lists for evictable pages | ||
2557 | */ | ||
2558 | unsigned long scan_unevictable_pages; | ||
2559 | |||
2560 | int scan_unevictable_handler(struct ctl_table *table, int write, | ||
2561 | struct file *file, void __user *buffer, | ||
2562 | size_t *length, loff_t *ppos) | ||
2563 | { | ||
2564 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | ||
2565 | |||
2566 | if (write && *(unsigned long *)table->data) | ||
2567 | scan_all_zones_unevictable_pages(); | ||
2568 | |||
2569 | scan_unevictable_pages = 0; | ||
2570 | return 0; | ||
2571 | } | ||
2572 | |||
2573 | /* | ||
2574 | * per node 'scan_unevictable_pages' attribute. On demand re-scan of | ||
2575 | * a specified node's per zone unevictable lists for evictable pages. | ||
2576 | */ | ||
2577 | |||
2578 | static ssize_t read_scan_unevictable_node(struct sys_device *dev, | ||
2579 | struct sysdev_attribute *attr, | ||
2580 | char *buf) | ||
2581 | { | ||
2582 | return sprintf(buf, "0\n"); /* always zero; should fit... */ | ||
2583 | } | ||
2584 | |||
2585 | static ssize_t write_scan_unevictable_node(struct sys_device *dev, | ||
2586 | struct sysdev_attribute *attr, | ||
2587 | const char *buf, size_t count) | ||
2588 | { | ||
2589 | struct zone *node_zones = NODE_DATA(dev->id)->node_zones; | ||
2590 | struct zone *zone; | ||
2591 | unsigned long res; | ||
2592 | unsigned long req = strict_strtoul(buf, 10, &res); | ||
2593 | |||
2594 | if (!req) | ||
2595 | return 1; /* zero is no-op */ | ||
2596 | |||
2597 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | ||
2598 | if (!populated_zone(zone)) | ||
2599 | continue; | ||
2600 | scan_zone_unevictable_pages(zone); | ||
2601 | } | ||
2602 | return 1; | ||
2603 | } | ||
2604 | |||
2605 | |||
2606 | static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR, | ||
2607 | read_scan_unevictable_node, | ||
2608 | write_scan_unevictable_node); | ||
2609 | |||
2610 | int scan_unevictable_register_node(struct node *node) | ||
2611 | { | ||
2612 | return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages); | ||
2613 | } | ||
2614 | |||
2615 | void scan_unevictable_unregister_node(struct node *node) | ||
2616 | { | ||
2617 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); | ||
2618 | } | ||
2619 | |||
2454 | #endif | 2620 | #endif |