diff options
author | Andrew Morton <akpm@osdl.org> | 2006-10-20 02:28:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-20 13:26:35 -0400 |
commit | 3fcfab16c5b86eaa3db3a9a31adba550c5b67141 (patch) | |
tree | bd348fa081b8fbec2c79fbf8f173a306d70b2b2c /mm | |
parent | 79e2de4bc53d7ca2a8eedee49e4a92479b4b530e (diff) |
[PATCH] separate bdi congestion functions from queue congestion functions
Separate out the concept of "queue congestion" from "backing-dev congestion".
Congestion is a backing-dev concept, not a queue concept.
The blk_* congestion functions are retained, as wrappers around the core
backing-dev congestion functions.
This proper layering is needed so that NFS can cleanly use the congestion
functions, and so that CONFIG_BLOCK=n actually links.
Cc: "Thomas Maier" <balagi@justmail.de>
Cc: "Jens Axboe" <jens.axboe@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: David Howells <dhowells@redhat.com>
Cc: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 3 | ||||
-rw-r--r-- | mm/backing-dev.c | 69 | ||||
-rw-r--r-- | mm/page-writeback.c | 17 | ||||
-rw-r--r-- | mm/page_alloc.c | 5 | ||||
-rw-r--r-- | mm/shmem.c | 3 | ||||
-rw-r--r-- | mm/vmscan.c | 6 |
6 files changed, 83 insertions, 20 deletions
diff --git a/mm/Makefile b/mm/Makefile index 12b3a4eee88d..f3c077eb0b8e 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -10,7 +10,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ | |||
10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | 10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ |
11 | page_alloc.o page-writeback.o pdflush.o \ | 11 | page_alloc.o page-writeback.o pdflush.o \ |
12 | readahead.o swap.o truncate.o vmscan.o \ | 12 | readahead.o swap.o truncate.o vmscan.o \ |
13 | prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) | 13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
14 | $(mmu-y) | ||
14 | 15 | ||
15 | ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) | 16 | ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) |
16 | obj-y += bounce.o | 17 | obj-y += bounce.o |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c new file mode 100644 index 000000000000..f50a2811f9dc --- /dev/null +++ b/mm/backing-dev.c | |||
@@ -0,0 +1,69 @@ | |||
1 | |||
2 | #include <linux/wait.h> | ||
3 | #include <linux/backing-dev.h> | ||
4 | #include <linux/fs.h> | ||
5 | #include <linux/sched.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | static wait_queue_head_t congestion_wqh[2] = { | ||
9 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), | ||
10 | __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) | ||
11 | }; | ||
12 | |||
13 | |||
14 | void clear_bdi_congested(struct backing_dev_info *bdi, int rw) | ||
15 | { | ||
16 | enum bdi_state bit; | ||
17 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
18 | |||
19 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; | ||
20 | clear_bit(bit, &bdi->state); | ||
21 | smp_mb__after_clear_bit(); | ||
22 | if (waitqueue_active(wqh)) | ||
23 | wake_up(wqh); | ||
24 | } | ||
25 | EXPORT_SYMBOL(clear_bdi_congested); | ||
26 | |||
27 | void set_bdi_congested(struct backing_dev_info *bdi, int rw) | ||
28 | { | ||
29 | enum bdi_state bit; | ||
30 | |||
31 | bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; | ||
32 | set_bit(bit, &bdi->state); | ||
33 | } | ||
34 | EXPORT_SYMBOL(set_bdi_congested); | ||
35 | |||
36 | /** | ||
37 | * congestion_wait - wait for a backing_dev to become uncongested | ||
38 | * @rw: READ or WRITE | ||
39 | * @timeout: timeout in jiffies | ||
40 | * | ||
41 | * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit | ||
42 | * write congestion. If no backing_devs are congested then just wait for the | ||
43 | * next write to be completed. | ||
44 | */ | ||
45 | long congestion_wait(int rw, long timeout) | ||
46 | { | ||
47 | long ret; | ||
48 | DEFINE_WAIT(wait); | ||
49 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
50 | |||
51 | prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); | ||
52 | ret = io_schedule_timeout(timeout); | ||
53 | finish_wait(wqh, &wait); | ||
54 | return ret; | ||
55 | } | ||
56 | EXPORT_SYMBOL(congestion_wait); | ||
57 | |||
58 | /** | ||
59 | * congestion_end - wake up sleepers on a congested backing_dev_info | ||
60 | * @rw: READ or WRITE | ||
61 | */ | ||
62 | void congestion_end(int rw) | ||
63 | { | ||
64 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
65 | |||
66 | if (waitqueue_active(wqh)) | ||
67 | wake_up(wqh); | ||
68 | } | ||
69 | EXPORT_SYMBOL(congestion_end); | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a0f339057449..8d9b19f239c3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -222,7 +222,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
222 | if (pages_written >= write_chunk) | 222 | if (pages_written >= write_chunk) |
223 | break; /* We've done our duty */ | 223 | break; /* We've done our duty */ |
224 | } | 224 | } |
225 | blk_congestion_wait(WRITE, HZ/10); | 225 | congestion_wait(WRITE, HZ/10); |
226 | } | 226 | } |
227 | 227 | ||
228 | if (nr_reclaimable + global_page_state(NR_WRITEBACK) | 228 | if (nr_reclaimable + global_page_state(NR_WRITEBACK) |
@@ -314,7 +314,7 @@ void throttle_vm_writeout(void) | |||
314 | if (global_page_state(NR_UNSTABLE_NFS) + | 314 | if (global_page_state(NR_UNSTABLE_NFS) + |
315 | global_page_state(NR_WRITEBACK) <= dirty_thresh) | 315 | global_page_state(NR_WRITEBACK) <= dirty_thresh) |
316 | break; | 316 | break; |
317 | blk_congestion_wait(WRITE, HZ/10); | 317 | congestion_wait(WRITE, HZ/10); |
318 | } | 318 | } |
319 | } | 319 | } |
320 | 320 | ||
@@ -351,7 +351,7 @@ static void background_writeout(unsigned long _min_pages) | |||
351 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 351 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
352 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | 352 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { |
353 | /* Wrote less than expected */ | 353 | /* Wrote less than expected */ |
354 | blk_congestion_wait(WRITE, HZ/10); | 354 | congestion_wait(WRITE, HZ/10); |
355 | if (!wbc.encountered_congestion) | 355 | if (!wbc.encountered_congestion) |
356 | break; | 356 | break; |
357 | } | 357 | } |
@@ -422,7 +422,7 @@ static void wb_kupdate(unsigned long arg) | |||
422 | writeback_inodes(&wbc); | 422 | writeback_inodes(&wbc); |
423 | if (wbc.nr_to_write > 0) { | 423 | if (wbc.nr_to_write > 0) { |
424 | if (wbc.encountered_congestion) | 424 | if (wbc.encountered_congestion) |
425 | blk_congestion_wait(WRITE, HZ/10); | 425 | congestion_wait(WRITE, HZ/10); |
426 | else | 426 | else |
427 | break; /* All the old data is written */ | 427 | break; /* All the old data is written */ |
428 | } | 428 | } |
@@ -956,15 +956,6 @@ int test_set_page_writeback(struct page *page) | |||
956 | EXPORT_SYMBOL(test_set_page_writeback); | 956 | EXPORT_SYMBOL(test_set_page_writeback); |
957 | 957 | ||
958 | /* | 958 | /* |
959 | * Wakes up tasks that are being throttled due to writeback congestion | ||
960 | */ | ||
961 | void writeback_congestion_end(void) | ||
962 | { | ||
963 | blk_congestion_end(WRITE); | ||
964 | } | ||
965 | EXPORT_SYMBOL(writeback_congestion_end); | ||
966 | |||
967 | /* | ||
968 | * Return true if any of the pages in the mapping are marged with the | 959 | * Return true if any of the pages in the mapping are marged with the |
969 | * passed tag. | 960 | * passed tag. |
970 | */ | 961 | */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 40db96a655d0..afee38f04d84 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/stop_machine.h> | 39 | #include <linux/stop_machine.h> |
40 | #include <linux/sort.h> | 40 | #include <linux/sort.h> |
41 | #include <linux/pfn.h> | 41 | #include <linux/pfn.h> |
42 | #include <linux/backing-dev.h> | ||
42 | 43 | ||
43 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
44 | #include <asm/div64.h> | 45 | #include <asm/div64.h> |
@@ -1050,7 +1051,7 @@ nofail_alloc: | |||
1050 | if (page) | 1051 | if (page) |
1051 | goto got_pg; | 1052 | goto got_pg; |
1052 | if (gfp_mask & __GFP_NOFAIL) { | 1053 | if (gfp_mask & __GFP_NOFAIL) { |
1053 | blk_congestion_wait(WRITE, HZ/50); | 1054 | congestion_wait(WRITE, HZ/50); |
1054 | goto nofail_alloc; | 1055 | goto nofail_alloc; |
1055 | } | 1056 | } |
1056 | } | 1057 | } |
@@ -1113,7 +1114,7 @@ rebalance: | |||
1113 | do_retry = 1; | 1114 | do_retry = 1; |
1114 | } | 1115 | } |
1115 | if (do_retry) { | 1116 | if (do_retry) { |
1116 | blk_congestion_wait(WRITE, HZ/50); | 1117 | congestion_wait(WRITE, HZ/50); |
1117 | goto rebalance; | 1118 | goto rebalance; |
1118 | } | 1119 | } |
1119 | 1120 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index b378f66cf2f9..4959535fc14c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/ctype.h> | 48 | #include <linux/ctype.h> |
49 | #include <linux/migrate.h> | 49 | #include <linux/migrate.h> |
50 | #include <linux/highmem.h> | 50 | #include <linux/highmem.h> |
51 | #include <linux/backing-dev.h> | ||
51 | 52 | ||
52 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
53 | #include <asm/div64.h> | 54 | #include <asm/div64.h> |
@@ -1131,7 +1132,7 @@ repeat: | |||
1131 | page_cache_release(swappage); | 1132 | page_cache_release(swappage); |
1132 | if (error == -ENOMEM) { | 1133 | if (error == -ENOMEM) { |
1133 | /* let kswapd refresh zone for GFP_ATOMICs */ | 1134 | /* let kswapd refresh zone for GFP_ATOMICs */ |
1134 | blk_congestion_wait(WRITE, HZ/50); | 1135 | congestion_wait(WRITE, HZ/50); |
1135 | } | 1136 | } |
1136 | goto repeat; | 1137 | goto repeat; |
1137 | } | 1138 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index af73c14f9d88..f05527bf792b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1059,7 +1059,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1059 | 1059 | ||
1060 | /* Take a nap, wait for some writeback to complete */ | 1060 | /* Take a nap, wait for some writeback to complete */ |
1061 | if (sc.nr_scanned && priority < DEF_PRIORITY - 2) | 1061 | if (sc.nr_scanned && priority < DEF_PRIORITY - 2) |
1062 | blk_congestion_wait(WRITE, HZ/10); | 1062 | congestion_wait(WRITE, HZ/10); |
1063 | } | 1063 | } |
1064 | /* top priority shrink_caches still had more to do? don't OOM, then */ | 1064 | /* top priority shrink_caches still had more to do? don't OOM, then */ |
1065 | if (!sc.all_unreclaimable) | 1065 | if (!sc.all_unreclaimable) |
@@ -1214,7 +1214,7 @@ scan: | |||
1214 | * another pass across the zones. | 1214 | * another pass across the zones. |
1215 | */ | 1215 | */ |
1216 | if (total_scanned && priority < DEF_PRIORITY - 2) | 1216 | if (total_scanned && priority < DEF_PRIORITY - 2) |
1217 | blk_congestion_wait(WRITE, HZ/10); | 1217 | congestion_wait(WRITE, HZ/10); |
1218 | 1218 | ||
1219 | /* | 1219 | /* |
1220 | * We do this so kswapd doesn't build up large priorities for | 1220 | * We do this so kswapd doesn't build up large priorities for |
@@ -1458,7 +1458,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
1458 | goto out; | 1458 | goto out; |
1459 | 1459 | ||
1460 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | 1460 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) |
1461 | blk_congestion_wait(WRITE, HZ / 10); | 1461 | congestion_wait(WRITE, HZ / 10); |
1462 | } | 1462 | } |
1463 | 1463 | ||
1464 | lru_pages = 0; | 1464 | lru_pages = 0; |