aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2006-10-20 02:28:16 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-20 13:26:35 -0400
commit3fcfab16c5b86eaa3db3a9a31adba550c5b67141 (patch)
treebd348fa081b8fbec2c79fbf8f173a306d70b2b2c /mm
parent79e2de4bc53d7ca2a8eedee49e4a92479b4b530e (diff)
[PATCH] separate bdi congestion functions from queue congestion functions
Separate out the concept of "queue congestion" from "backing-dev congestion". Congestion is a backing-dev concept, not a queue concept. The blk_* congestion functions are retained, as wrappers around the core backing-dev congestion functions. This proper layering is needed so that NFS can cleanly use the congestion functions, and so that CONFIG_BLOCK=n actually links. Cc: "Thomas Maier" <balagi@justmail.de> Cc: "Jens Axboe" <jens.axboe@oracle.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: David Howells <dhowells@redhat.com> Cc: Peter Osterlund <petero2@telia.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile3
-rw-r--r--mm/backing-dev.c69
-rw-r--r--mm/page-writeback.c17
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/shmem.c3
-rw-r--r--mm/vmscan.c6
6 files changed, 83 insertions, 20 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 12b3a4eee88d..f3c077eb0b8e 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -10,7 +10,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 page_alloc.o page-writeback.o pdflush.o \ 11 page_alloc.o page-writeback.o pdflush.o \
12 readahead.o swap.o truncate.o vmscan.o \ 12 readahead.o swap.o truncate.o vmscan.o \
13 prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
14 $(mmu-y)
14 15
15ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) 16ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
16obj-y += bounce.o 17obj-y += bounce.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
new file mode 100644
index 000000000000..f50a2811f9dc
--- /dev/null
+++ b/mm/backing-dev.c
@@ -0,0 +1,69 @@
1
2#include <linux/wait.h>
3#include <linux/backing-dev.h>
4#include <linux/fs.h>
5#include <linux/sched.h>
6#include <linux/module.h>
7
8static wait_queue_head_t congestion_wqh[2] = {
9 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
10 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
11 };
12
13
14void clear_bdi_congested(struct backing_dev_info *bdi, int rw)
15{
16 enum bdi_state bit;
17 wait_queue_head_t *wqh = &congestion_wqh[rw];
18
19 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
20 clear_bit(bit, &bdi->state);
21 smp_mb__after_clear_bit();
22 if (waitqueue_active(wqh))
23 wake_up(wqh);
24}
25EXPORT_SYMBOL(clear_bdi_congested);
26
27void set_bdi_congested(struct backing_dev_info *bdi, int rw)
28{
29 enum bdi_state bit;
30
31 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
32 set_bit(bit, &bdi->state);
33}
34EXPORT_SYMBOL(set_bdi_congested);
35
36/**
37 * congestion_wait - wait for a backing_dev to become uncongested
38 * @rw: READ or WRITE
39 * @timeout: timeout in jiffies
40 *
41 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
42 * write congestion. If no backing_devs are congested then just wait for the
43 * next write to be completed.
44 */
45long congestion_wait(int rw, long timeout)
46{
47 long ret;
48 DEFINE_WAIT(wait);
49 wait_queue_head_t *wqh = &congestion_wqh[rw];
50
51 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
52 ret = io_schedule_timeout(timeout);
53 finish_wait(wqh, &wait);
54 return ret;
55}
56EXPORT_SYMBOL(congestion_wait);
57
58/**
59 * congestion_end - wake up sleepers on a congested backing_dev_info
60 * @rw: READ or WRITE
61 */
62void congestion_end(int rw)
63{
64 wait_queue_head_t *wqh = &congestion_wqh[rw];
65
66 if (waitqueue_active(wqh))
67 wake_up(wqh);
68}
69EXPORT_SYMBOL(congestion_end);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a0f339057449..8d9b19f239c3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -222,7 +222,7 @@ static void balance_dirty_pages(struct address_space *mapping)
222 if (pages_written >= write_chunk) 222 if (pages_written >= write_chunk)
223 break; /* We've done our duty */ 223 break; /* We've done our duty */
224 } 224 }
225 blk_congestion_wait(WRITE, HZ/10); 225 congestion_wait(WRITE, HZ/10);
226 } 226 }
227 227
228 if (nr_reclaimable + global_page_state(NR_WRITEBACK) 228 if (nr_reclaimable + global_page_state(NR_WRITEBACK)
@@ -314,7 +314,7 @@ void throttle_vm_writeout(void)
314 if (global_page_state(NR_UNSTABLE_NFS) + 314 if (global_page_state(NR_UNSTABLE_NFS) +
315 global_page_state(NR_WRITEBACK) <= dirty_thresh) 315 global_page_state(NR_WRITEBACK) <= dirty_thresh)
316 break; 316 break;
317 blk_congestion_wait(WRITE, HZ/10); 317 congestion_wait(WRITE, HZ/10);
318 } 318 }
319} 319}
320 320
@@ -351,7 +351,7 @@ static void background_writeout(unsigned long _min_pages)
351 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; 351 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
352 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { 352 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
353 /* Wrote less than expected */ 353 /* Wrote less than expected */
354 blk_congestion_wait(WRITE, HZ/10); 354 congestion_wait(WRITE, HZ/10);
355 if (!wbc.encountered_congestion) 355 if (!wbc.encountered_congestion)
356 break; 356 break;
357 } 357 }
@@ -422,7 +422,7 @@ static void wb_kupdate(unsigned long arg)
422 writeback_inodes(&wbc); 422 writeback_inodes(&wbc);
423 if (wbc.nr_to_write > 0) { 423 if (wbc.nr_to_write > 0) {
424 if (wbc.encountered_congestion) 424 if (wbc.encountered_congestion)
425 blk_congestion_wait(WRITE, HZ/10); 425 congestion_wait(WRITE, HZ/10);
426 else 426 else
427 break; /* All the old data is written */ 427 break; /* All the old data is written */
428 } 428 }
@@ -956,15 +956,6 @@ int test_set_page_writeback(struct page *page)
956EXPORT_SYMBOL(test_set_page_writeback); 956EXPORT_SYMBOL(test_set_page_writeback);
957 957
958/* 958/*
959 * Wakes up tasks that are being throttled due to writeback congestion
960 */
961void writeback_congestion_end(void)
962{
963 blk_congestion_end(WRITE);
964}
965EXPORT_SYMBOL(writeback_congestion_end);
966
967/*
968 * Return true if any of the pages in the mapping are marged with the 959 * Return true if any of the pages in the mapping are marged with the
969 * passed tag. 960 * passed tag.
970 */ 961 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 40db96a655d0..afee38f04d84 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -39,6 +39,7 @@
39#include <linux/stop_machine.h> 39#include <linux/stop_machine.h>
40#include <linux/sort.h> 40#include <linux/sort.h>
41#include <linux/pfn.h> 41#include <linux/pfn.h>
42#include <linux/backing-dev.h>
42 43
43#include <asm/tlbflush.h> 44#include <asm/tlbflush.h>
44#include <asm/div64.h> 45#include <asm/div64.h>
@@ -1050,7 +1051,7 @@ nofail_alloc:
1050 if (page) 1051 if (page)
1051 goto got_pg; 1052 goto got_pg;
1052 if (gfp_mask & __GFP_NOFAIL) { 1053 if (gfp_mask & __GFP_NOFAIL) {
1053 blk_congestion_wait(WRITE, HZ/50); 1054 congestion_wait(WRITE, HZ/50);
1054 goto nofail_alloc; 1055 goto nofail_alloc;
1055 } 1056 }
1056 } 1057 }
@@ -1113,7 +1114,7 @@ rebalance:
1113 do_retry = 1; 1114 do_retry = 1;
1114 } 1115 }
1115 if (do_retry) { 1116 if (do_retry) {
1116 blk_congestion_wait(WRITE, HZ/50); 1117 congestion_wait(WRITE, HZ/50);
1117 goto rebalance; 1118 goto rebalance;
1118 } 1119 }
1119 1120
diff --git a/mm/shmem.c b/mm/shmem.c
index b378f66cf2f9..4959535fc14c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -48,6 +48,7 @@
48#include <linux/ctype.h> 48#include <linux/ctype.h>
49#include <linux/migrate.h> 49#include <linux/migrate.h>
50#include <linux/highmem.h> 50#include <linux/highmem.h>
51#include <linux/backing-dev.h>
51 52
52#include <asm/uaccess.h> 53#include <asm/uaccess.h>
53#include <asm/div64.h> 54#include <asm/div64.h>
@@ -1131,7 +1132,7 @@ repeat:
1131 page_cache_release(swappage); 1132 page_cache_release(swappage);
1132 if (error == -ENOMEM) { 1133 if (error == -ENOMEM) {
1133 /* let kswapd refresh zone for GFP_ATOMICs */ 1134 /* let kswapd refresh zone for GFP_ATOMICs */
1134 blk_congestion_wait(WRITE, HZ/50); 1135 congestion_wait(WRITE, HZ/50);
1135 } 1136 }
1136 goto repeat; 1137 goto repeat;
1137 } 1138 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index af73c14f9d88..f05527bf792b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1059,7 +1059,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1059 1059
1060 /* Take a nap, wait for some writeback to complete */ 1060 /* Take a nap, wait for some writeback to complete */
1061 if (sc.nr_scanned && priority < DEF_PRIORITY - 2) 1061 if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
1062 blk_congestion_wait(WRITE, HZ/10); 1062 congestion_wait(WRITE, HZ/10);
1063 } 1063 }
1064 /* top priority shrink_caches still had more to do? don't OOM, then */ 1064 /* top priority shrink_caches still had more to do? don't OOM, then */
1065 if (!sc.all_unreclaimable) 1065 if (!sc.all_unreclaimable)
@@ -1214,7 +1214,7 @@ scan:
1214 * another pass across the zones. 1214 * another pass across the zones.
1215 */ 1215 */
1216 if (total_scanned && priority < DEF_PRIORITY - 2) 1216 if (total_scanned && priority < DEF_PRIORITY - 2)
1217 blk_congestion_wait(WRITE, HZ/10); 1217 congestion_wait(WRITE, HZ/10);
1218 1218
1219 /* 1219 /*
1220 * We do this so kswapd doesn't build up large priorities for 1220 * We do this so kswapd doesn't build up large priorities for
@@ -1458,7 +1458,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1458 goto out; 1458 goto out;
1459 1459
1460 if (sc.nr_scanned && prio < DEF_PRIORITY - 2) 1460 if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
1461 blk_congestion_wait(WRITE, HZ / 10); 1461 congestion_wait(WRITE, HZ / 10);
1462 } 1462 }
1463 1463
1464 lru_pages = 0; 1464 lru_pages = 0;