diff options
-rw-r--r-- | arch/i386/kernel/syscall_table.S | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.S | 2 | ||||
-rw-r--r-- | include/asm-i386/unistd.h | 2 | ||||
-rw-r--r-- | include/asm-ia64/unistd.h | 1 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | kernel/sys_ni.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 33 | ||||
-rw-r--r-- | mm/vmscan.c | 64 |
9 files changed, 104 insertions, 8 deletions
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 6cd1ed311f02..d408afaf6495 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S | |||
@@ -251,7 +251,7 @@ ENTRY(sys_call_table) | |||
251 | .long sys_io_submit | 251 | .long sys_io_submit |
252 | .long sys_io_cancel | 252 | .long sys_io_cancel |
253 | .long sys_fadvise64 /* 250 */ | 253 | .long sys_fadvise64 /* 250 */ |
254 | .long sys_ni_syscall | 254 | .long sys_set_zone_reclaim |
255 | .long sys_exit_group | 255 | .long sys_exit_group |
256 | .long sys_lookup_dcookie | 256 | .long sys_lookup_dcookie |
257 | .long sys_epoll_create | 257 | .long sys_epoll_create |
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index d99316c9be28..b1d5d3d5276c 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
@@ -1579,7 +1579,7 @@ sys_call_table: | |||
1579 | data8 sys_keyctl | 1579 | data8 sys_keyctl |
1580 | data8 sys_ni_syscall | 1580 | data8 sys_ni_syscall |
1581 | data8 sys_ni_syscall // 1275 | 1581 | data8 sys_ni_syscall // 1275 |
1582 | data8 sys_ni_syscall | 1582 | data8 sys_set_zone_reclaim |
1583 | data8 sys_ni_syscall | 1583 | data8 sys_ni_syscall |
1584 | data8 sys_ni_syscall | 1584 | data8 sys_ni_syscall |
1585 | data8 sys_ni_syscall | 1585 | data8 sys_ni_syscall |
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 61bcc1b1e3f4..176413fb9ae3 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h | |||
@@ -256,7 +256,7 @@ | |||
256 | #define __NR_io_submit 248 | 256 | #define __NR_io_submit 248 |
257 | #define __NR_io_cancel 249 | 257 | #define __NR_io_cancel 249 |
258 | #define __NR_fadvise64 250 | 258 | #define __NR_fadvise64 250 |
259 | 259 | #define __NR_set_zone_reclaim 251 | |
260 | #define __NR_exit_group 252 | 260 | #define __NR_exit_group 252 |
261 | #define __NR_lookup_dcookie 253 | 261 | #define __NR_lookup_dcookie 253 |
262 | #define __NR_epoll_create 254 | 262 | #define __NR_epoll_create 254 |
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 33e26c557c5c..f7f43ec2483a 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h | |||
@@ -263,6 +263,7 @@ | |||
263 | #define __NR_add_key 1271 | 263 | #define __NR_add_key 1271 |
264 | #define __NR_request_key 1272 | 264 | #define __NR_request_key 1272 |
265 | #define __NR_keyctl 1273 | 265 | #define __NR_keyctl 1273 |
266 | #define __NR_set_zone_reclaim 1276 | ||
266 | 267 | ||
267 | #ifdef __KERNEL__ | 268 | #ifdef __KERNEL__ |
268 | 269 | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index beacd931b606..dfc2452ccb10 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -145,6 +145,12 @@ struct zone { | |||
145 | int all_unreclaimable; /* All pages pinned */ | 145 | int all_unreclaimable; /* All pages pinned */ |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * Does the allocator try to reclaim pages from the zone as soon | ||
149 | * as it fails a watermark_ok() in __alloc_pages? | ||
150 | */ | ||
151 | int reclaim_pages; | ||
152 | |||
153 | /* | ||
148 | * prev_priority holds the scanning priority for this zone. It is | 154 | * prev_priority holds the scanning priority for this zone. It is |
149 | * defined as the scanning priority at which we achieved our reclaim | 155 | * defined as the scanning priority at which we achieved our reclaim |
150 | * target at the previous try_to_free_pages() or balance_pgdat() | 156 | * target at the previous try_to_free_pages() or balance_pgdat() |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 3bbc41be9bd0..0d21e682d99d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -173,6 +173,7 @@ extern void swap_setup(void); | |||
173 | 173 | ||
174 | /* linux/mm/vmscan.c */ | 174 | /* linux/mm/vmscan.c */ |
175 | extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); | 175 | extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); |
176 | extern int zone_reclaim(struct zone *, unsigned int, unsigned int); | ||
176 | extern int shrink_all_memory(int); | 177 | extern int shrink_all_memory(int); |
177 | extern int vm_swappiness; | 178 | extern int vm_swappiness; |
178 | 179 | ||
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 0dda70ed1f98..6f15bea7d1a8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -77,6 +77,7 @@ cond_syscall(sys_request_key); | |||
77 | cond_syscall(sys_keyctl); | 77 | cond_syscall(sys_keyctl); |
78 | cond_syscall(compat_sys_keyctl); | 78 | cond_syscall(compat_sys_keyctl); |
79 | cond_syscall(compat_sys_socketcall); | 79 | cond_syscall(compat_sys_socketcall); |
80 | cond_syscall(sys_set_zone_reclaim); | ||
80 | 81 | ||
81 | /* arch-specific weak syscall entries */ | 82 | /* arch-specific weak syscall entries */ |
82 | cond_syscall(sys_pciconfig_read); | 83 | cond_syscall(sys_pciconfig_read); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 40169f0b7e9e..3c0f69ded6b5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -724,6 +724,14 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
724 | return 1; | 724 | return 1; |
725 | } | 725 | } |
726 | 726 | ||
727 | static inline int | ||
728 | should_reclaim_zone(struct zone *z, unsigned int gfp_mask) | ||
729 | { | ||
730 | if (!z->reclaim_pages) | ||
731 | return 0; | ||
732 | return 1; | ||
733 | } | ||
734 | |||
727 | /* | 735 | /* |
728 | * This is the 'heart' of the zoned buddy allocator. | 736 | * This is the 'heart' of the zoned buddy allocator. |
729 | */ | 737 | */ |
@@ -760,17 +768,32 @@ __alloc_pages(unsigned int __nocast gfp_mask, unsigned int order, | |||
760 | 768 | ||
761 | classzone_idx = zone_idx(zones[0]); | 769 | classzone_idx = zone_idx(zones[0]); |
762 | 770 | ||
763 | restart: | 771 | restart: |
764 | /* Go through the zonelist once, looking for a zone with enough free */ | 772 | /* Go through the zonelist once, looking for a zone with enough free */ |
765 | for (i = 0; (z = zones[i]) != NULL; i++) { | 773 | for (i = 0; (z = zones[i]) != NULL; i++) { |
766 | 774 | int do_reclaim = should_reclaim_zone(z, gfp_mask); | |
767 | if (!zone_watermark_ok(z, order, z->pages_low, | ||
768 | classzone_idx, 0, 0)) | ||
769 | continue; | ||
770 | 775 | ||
771 | if (!cpuset_zone_allowed(z)) | 776 | if (!cpuset_zone_allowed(z)) |
772 | continue; | 777 | continue; |
773 | 778 | ||
779 | /* | ||
780 | * If the zone is to attempt early page reclaim then this loop | ||
781 | * will try to reclaim pages and check the watermark a second | ||
782 | * time before giving up and falling back to the next zone. | ||
783 | */ | ||
784 | zone_reclaim_retry: | ||
785 | if (!zone_watermark_ok(z, order, z->pages_low, | ||
786 | classzone_idx, 0, 0)) { | ||
787 | if (!do_reclaim) | ||
788 | continue; | ||
789 | else { | ||
790 | zone_reclaim(z, gfp_mask, order); | ||
791 | /* Only try reclaim once */ | ||
792 | do_reclaim = 0; | ||
793 | goto zone_reclaim_retry; | ||
794 | } | ||
795 | } | ||
796 | |||
774 | page = buffered_rmqueue(z, order, gfp_mask); | 797 | page = buffered_rmqueue(z, order, gfp_mask); |
775 | if (page) | 798 | if (page) |
776 | goto got_pg; | 799 | goto got_pg; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 6379ddbffd9b..7da846960d8a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1323,3 +1323,67 @@ static int __init kswapd_init(void) | |||
1323 | } | 1323 | } |
1324 | 1324 | ||
1325 | module_init(kswapd_init) | 1325 | module_init(kswapd_init) |
1326 | |||
1327 | |||
1328 | /* | ||
1329 | * Try to free up some pages from this zone through reclaim. | ||
1330 | */ | ||
1331 | int zone_reclaim(struct zone *zone, unsigned int gfp_mask, unsigned int order) | ||
1332 | { | ||
1333 | struct scan_control sc; | ||
1334 | int nr_pages = 1 << order; | ||
1335 | int total_reclaimed = 0; | ||
1336 | |||
1337 | /* The reclaim may sleep, so don't do it if sleep isn't allowed */ | ||
1338 | if (!(gfp_mask & __GFP_WAIT)) | ||
1339 | return 0; | ||
1340 | if (zone->all_unreclaimable) | ||
1341 | return 0; | ||
1342 | |||
1343 | sc.gfp_mask = gfp_mask; | ||
1344 | sc.may_writepage = 0; | ||
1345 | sc.may_swap = 0; | ||
1346 | sc.nr_mapped = read_page_state(nr_mapped); | ||
1347 | sc.nr_scanned = 0; | ||
1348 | sc.nr_reclaimed = 0; | ||
1349 | /* scan at the highest priority */ | ||
1350 | sc.priority = 0; | ||
1351 | |||
1352 | if (nr_pages > SWAP_CLUSTER_MAX) | ||
1353 | sc.swap_cluster_max = nr_pages; | ||
1354 | else | ||
1355 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | ||
1356 | |||
1357 | shrink_zone(zone, &sc); | ||
1358 | total_reclaimed = sc.nr_reclaimed; | ||
1359 | |||
1360 | return total_reclaimed; | ||
1361 | } | ||
1362 | |||
1363 | asmlinkage long sys_set_zone_reclaim(unsigned int node, unsigned int zone, | ||
1364 | unsigned int state) | ||
1365 | { | ||
1366 | struct zone *z; | ||
1367 | int i; | ||
1368 | |||
1369 | if (node >= MAX_NUMNODES || !node_online(node)) | ||
1370 | return -EINVAL; | ||
1371 | |||
1372 | /* This will break if we ever add more zones */ | ||
1373 | if (!(zone & (1<<ZONE_DMA|1<<ZONE_NORMAL|1<<ZONE_HIGHMEM))) | ||
1374 | return -EINVAL; | ||
1375 | |||
1376 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
1377 | if (!(zone & 1<<i)) | ||
1378 | continue; | ||
1379 | |||
1380 | z = &NODE_DATA(node)->node_zones[i]; | ||
1381 | |||
1382 | if (state) | ||
1383 | z->reclaim_pages = 1; | ||
1384 | else | ||
1385 | z->reclaim_pages = 0; | ||
1386 | } | ||
1387 | |||
1388 | return 0; | ||
1389 | } | ||