diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-11 12:17:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-11 12:17:05 -0400 |
commit | a12e4d304ce701844c639541d90df86e165d03f9 (patch) | |
tree | 6ad7314b63a3303d9aa36f1c7eeb68abf64d3592 /mm | |
parent | 89af571ca633ada14d17746519a179553a732d31 (diff) | |
parent | 500b067c5e6ceea49cf280a02597b1169320e08c (diff) |
Merge branch 'writeback' of git://git.kernel.dk/linux-2.6-block
* 'writeback' of git://git.kernel.dk/linux-2.6-block:
writeback: check for registered bdi in flusher add and inode dirty
writeback: add name to backing_dev_info
writeback: add some debug inode list counters to bdi stats
writeback: get rid of pdflush completely
writeback: switch to per-bdi threads for flushing data
writeback: move dirty inodes from super_block to backing_dev_info
writeback: get rid of generic_sync_sb_inodes() export
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 2 | ||||
-rw-r--r-- | mm/backing-dev.c | 381 | ||||
-rw-r--r-- | mm/page-writeback.c | 182 | ||||
-rw-r--r-- | mm/pdflush.c | 269 | ||||
-rw-r--r-- | mm/swap_state.c | 1 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
6 files changed, 405 insertions, 432 deletions
diff --git a/mm/Makefile b/mm/Makefile index 5e0bd6426693..147a7a7873c4 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -8,7 +8,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ | |||
8 | vmalloc.o | 8 | vmalloc.o |
9 | 9 | ||
10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | 10 | obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ |
11 | maccess.o page_alloc.o page-writeback.o pdflush.o \ | 11 | maccess.o page_alloc.o page-writeback.o \ |
12 | readahead.o swap.o truncate.o vmscan.o shmem.o \ | 12 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ | 13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
14 | page_isolation.o mm_init.o $(mmu-y) | 14 | page_isolation.o mm_init.o $(mmu-y) |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c86edd244294..d3ca0dac1111 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -1,8 +1,11 @@ | |||
1 | 1 | ||
2 | #include <linux/wait.h> | 2 | #include <linux/wait.h> |
3 | #include <linux/backing-dev.h> | 3 | #include <linux/backing-dev.h> |
4 | #include <linux/kthread.h> | ||
5 | #include <linux/freezer.h> | ||
4 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
5 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
8 | #include <linux/mm.h> | ||
6 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
7 | #include <linux/module.h> | 10 | #include <linux/module.h> |
8 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
@@ -14,6 +17,7 @@ void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | |||
14 | EXPORT_SYMBOL(default_unplug_io_fn); | 17 | EXPORT_SYMBOL(default_unplug_io_fn); |
15 | 18 | ||
16 | struct backing_dev_info default_backing_dev_info = { | 19 | struct backing_dev_info default_backing_dev_info = { |
20 | .name = "default", | ||
17 | .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, | 21 | .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, |
18 | .state = 0, | 22 | .state = 0, |
19 | .capabilities = BDI_CAP_MAP_COPY, | 23 | .capabilities = BDI_CAP_MAP_COPY, |
@@ -22,6 +26,18 @@ struct backing_dev_info default_backing_dev_info = { | |||
22 | EXPORT_SYMBOL_GPL(default_backing_dev_info); | 26 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
23 | 27 | ||
24 | static struct class *bdi_class; | 28 | static struct class *bdi_class; |
29 | DEFINE_SPINLOCK(bdi_lock); | ||
30 | LIST_HEAD(bdi_list); | ||
31 | LIST_HEAD(bdi_pending_list); | ||
32 | |||
33 | static struct task_struct *sync_supers_tsk; | ||
34 | static struct timer_list sync_supers_timer; | ||
35 | |||
36 | static int bdi_sync_supers(void *); | ||
37 | static void sync_supers_timer_fn(unsigned long); | ||
38 | static void arm_supers_timer(void); | ||
39 | |||
40 | static void bdi_add_default_flusher_task(struct backing_dev_info *bdi); | ||
25 | 41 | ||
26 | #ifdef CONFIG_DEBUG_FS | 42 | #ifdef CONFIG_DEBUG_FS |
27 | #include <linux/debugfs.h> | 43 | #include <linux/debugfs.h> |
@@ -37,9 +53,29 @@ static void bdi_debug_init(void) | |||
37 | static int bdi_debug_stats_show(struct seq_file *m, void *v) | 53 | static int bdi_debug_stats_show(struct seq_file *m, void *v) |
38 | { | 54 | { |
39 | struct backing_dev_info *bdi = m->private; | 55 | struct backing_dev_info *bdi = m->private; |
56 | struct bdi_writeback *wb; | ||
40 | unsigned long background_thresh; | 57 | unsigned long background_thresh; |
41 | unsigned long dirty_thresh; | 58 | unsigned long dirty_thresh; |
42 | unsigned long bdi_thresh; | 59 | unsigned long bdi_thresh; |
60 | unsigned long nr_dirty, nr_io, nr_more_io, nr_wb; | ||
61 | struct inode *inode; | ||
62 | |||
63 | /* | ||
64 | * inode lock is enough here, the bdi->wb_list is protected by | ||
65 | * RCU on the reader side | ||
66 | */ | ||
67 | nr_wb = nr_dirty = nr_io = nr_more_io = 0; | ||
68 | spin_lock(&inode_lock); | ||
69 | list_for_each_entry(wb, &bdi->wb_list, list) { | ||
70 | nr_wb++; | ||
71 | list_for_each_entry(inode, &wb->b_dirty, i_list) | ||
72 | nr_dirty++; | ||
73 | list_for_each_entry(inode, &wb->b_io, i_list) | ||
74 | nr_io++; | ||
75 | list_for_each_entry(inode, &wb->b_more_io, i_list) | ||
76 | nr_more_io++; | ||
77 | } | ||
78 | spin_unlock(&inode_lock); | ||
43 | 79 | ||
44 | get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); | 80 | get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); |
45 | 81 | ||
@@ -49,12 +85,22 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) | |||
49 | "BdiReclaimable: %8lu kB\n" | 85 | "BdiReclaimable: %8lu kB\n" |
50 | "BdiDirtyThresh: %8lu kB\n" | 86 | "BdiDirtyThresh: %8lu kB\n" |
51 | "DirtyThresh: %8lu kB\n" | 87 | "DirtyThresh: %8lu kB\n" |
52 | "BackgroundThresh: %8lu kB\n", | 88 | "BackgroundThresh: %8lu kB\n" |
89 | "WriteBack threads:%8lu\n" | ||
90 | "b_dirty: %8lu\n" | ||
91 | "b_io: %8lu\n" | ||
92 | "b_more_io: %8lu\n" | ||
93 | "bdi_list: %8u\n" | ||
94 | "state: %8lx\n" | ||
95 | "wb_mask: %8lx\n" | ||
96 | "wb_list: %8u\n" | ||
97 | "wb_cnt: %8u\n", | ||
53 | (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), | 98 | (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), |
54 | (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), | 99 | (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), |
55 | K(bdi_thresh), | 100 | K(bdi_thresh), K(dirty_thresh), |
56 | K(dirty_thresh), | 101 | K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, |
57 | K(background_thresh)); | 102 | !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask, |
103 | !list_empty(&bdi->wb_list), bdi->wb_cnt); | ||
58 | #undef K | 104 | #undef K |
59 | 105 | ||
60 | return 0; | 106 | return 0; |
@@ -185,6 +231,13 @@ static int __init default_bdi_init(void) | |||
185 | { | 231 | { |
186 | int err; | 232 | int err; |
187 | 233 | ||
234 | sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers"); | ||
235 | BUG_ON(IS_ERR(sync_supers_tsk)); | ||
236 | |||
237 | init_timer(&sync_supers_timer); | ||
238 | setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); | ||
239 | arm_supers_timer(); | ||
240 | |||
188 | err = bdi_init(&default_backing_dev_info); | 241 | err = bdi_init(&default_backing_dev_info); |
189 | if (!err) | 242 | if (!err) |
190 | bdi_register(&default_backing_dev_info, NULL, "default"); | 243 | bdi_register(&default_backing_dev_info, NULL, "default"); |
@@ -193,6 +246,248 @@ static int __init default_bdi_init(void) | |||
193 | } | 246 | } |
194 | subsys_initcall(default_bdi_init); | 247 | subsys_initcall(default_bdi_init); |
195 | 248 | ||
249 | static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) | ||
250 | { | ||
251 | memset(wb, 0, sizeof(*wb)); | ||
252 | |||
253 | wb->bdi = bdi; | ||
254 | wb->last_old_flush = jiffies; | ||
255 | INIT_LIST_HEAD(&wb->b_dirty); | ||
256 | INIT_LIST_HEAD(&wb->b_io); | ||
257 | INIT_LIST_HEAD(&wb->b_more_io); | ||
258 | } | ||
259 | |||
260 | static void bdi_task_init(struct backing_dev_info *bdi, | ||
261 | struct bdi_writeback *wb) | ||
262 | { | ||
263 | struct task_struct *tsk = current; | ||
264 | |||
265 | spin_lock(&bdi->wb_lock); | ||
266 | list_add_tail_rcu(&wb->list, &bdi->wb_list); | ||
267 | spin_unlock(&bdi->wb_lock); | ||
268 | |||
269 | tsk->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
270 | set_freezable(); | ||
271 | |||
272 | /* | ||
273 | * Our parent may run at a different priority, just set us to normal | ||
274 | */ | ||
275 | set_user_nice(tsk, 0); | ||
276 | } | ||
277 | |||
278 | static int bdi_start_fn(void *ptr) | ||
279 | { | ||
280 | struct bdi_writeback *wb = ptr; | ||
281 | struct backing_dev_info *bdi = wb->bdi; | ||
282 | int ret; | ||
283 | |||
284 | /* | ||
285 | * Add us to the active bdi_list | ||
286 | */ | ||
287 | spin_lock(&bdi_lock); | ||
288 | list_add(&bdi->bdi_list, &bdi_list); | ||
289 | spin_unlock(&bdi_lock); | ||
290 | |||
291 | bdi_task_init(bdi, wb); | ||
292 | |||
293 | /* | ||
294 | * Clear pending bit and wakeup anybody waiting to tear us down | ||
295 | */ | ||
296 | clear_bit(BDI_pending, &bdi->state); | ||
297 | smp_mb__after_clear_bit(); | ||
298 | wake_up_bit(&bdi->state, BDI_pending); | ||
299 | |||
300 | ret = bdi_writeback_task(wb); | ||
301 | |||
302 | /* | ||
303 | * Remove us from the list | ||
304 | */ | ||
305 | spin_lock(&bdi->wb_lock); | ||
306 | list_del_rcu(&wb->list); | ||
307 | spin_unlock(&bdi->wb_lock); | ||
308 | |||
309 | /* | ||
310 | * Flush any work that raced with us exiting. No new work | ||
311 | * will be added, since this bdi isn't discoverable anymore. | ||
312 | */ | ||
313 | if (!list_empty(&bdi->work_list)) | ||
314 | wb_do_writeback(wb, 1); | ||
315 | |||
316 | wb->task = NULL; | ||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | int bdi_has_dirty_io(struct backing_dev_info *bdi) | ||
321 | { | ||
322 | return wb_has_dirty_io(&bdi->wb); | ||
323 | } | ||
324 | |||
325 | static void bdi_flush_io(struct backing_dev_info *bdi) | ||
326 | { | ||
327 | struct writeback_control wbc = { | ||
328 | .bdi = bdi, | ||
329 | .sync_mode = WB_SYNC_NONE, | ||
330 | .older_than_this = NULL, | ||
331 | .range_cyclic = 1, | ||
332 | .nr_to_write = 1024, | ||
333 | }; | ||
334 | |||
335 | writeback_inodes_wbc(&wbc); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * kupdated() used to do this. We cannot do it from the bdi_forker_task() | ||
340 | * or we risk deadlocking on ->s_umount. The longer term solution would be | ||
341 | * to implement sync_supers_bdi() or similar and simply do it from the | ||
342 | * bdi writeback tasks individually. | ||
343 | */ | ||
344 | static int bdi_sync_supers(void *unused) | ||
345 | { | ||
346 | set_user_nice(current, 0); | ||
347 | |||
348 | while (!kthread_should_stop()) { | ||
349 | set_current_state(TASK_INTERRUPTIBLE); | ||
350 | schedule(); | ||
351 | |||
352 | /* | ||
353 | * Do this periodically, like kupdated() did before. | ||
354 | */ | ||
355 | sync_supers(); | ||
356 | } | ||
357 | |||
358 | return 0; | ||
359 | } | ||
360 | |||
361 | static void arm_supers_timer(void) | ||
362 | { | ||
363 | unsigned long next; | ||
364 | |||
365 | next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; | ||
366 | mod_timer(&sync_supers_timer, round_jiffies_up(next)); | ||
367 | } | ||
368 | |||
369 | static void sync_supers_timer_fn(unsigned long unused) | ||
370 | { | ||
371 | wake_up_process(sync_supers_tsk); | ||
372 | arm_supers_timer(); | ||
373 | } | ||
374 | |||
375 | static int bdi_forker_task(void *ptr) | ||
376 | { | ||
377 | struct bdi_writeback *me = ptr; | ||
378 | |||
379 | bdi_task_init(me->bdi, me); | ||
380 | |||
381 | for (;;) { | ||
382 | struct backing_dev_info *bdi, *tmp; | ||
383 | struct bdi_writeback *wb; | ||
384 | |||
385 | /* | ||
386 | * Temporary measure, we want to make sure we don't see | ||
387 | * dirty data on the default backing_dev_info | ||
388 | */ | ||
389 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) | ||
390 | wb_do_writeback(me, 0); | ||
391 | |||
392 | spin_lock(&bdi_lock); | ||
393 | |||
394 | /* | ||
395 | * Check if any existing bdi's have dirty data without | ||
396 | * a thread registered. If so, set that up. | ||
397 | */ | ||
398 | list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) { | ||
399 | if (bdi->wb.task) | ||
400 | continue; | ||
401 | if (list_empty(&bdi->work_list) && | ||
402 | !bdi_has_dirty_io(bdi)) | ||
403 | continue; | ||
404 | |||
405 | bdi_add_default_flusher_task(bdi); | ||
406 | } | ||
407 | |||
408 | set_current_state(TASK_INTERRUPTIBLE); | ||
409 | |||
410 | if (list_empty(&bdi_pending_list)) { | ||
411 | unsigned long wait; | ||
412 | |||
413 | spin_unlock(&bdi_lock); | ||
414 | wait = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
415 | schedule_timeout(wait); | ||
416 | try_to_freeze(); | ||
417 | continue; | ||
418 | } | ||
419 | |||
420 | __set_current_state(TASK_RUNNING); | ||
421 | |||
422 | /* | ||
423 | * This is our real job - check for pending entries in | ||
424 | * bdi_pending_list, and create the tasks that got added | ||
425 | */ | ||
426 | bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, | ||
427 | bdi_list); | ||
428 | list_del_init(&bdi->bdi_list); | ||
429 | spin_unlock(&bdi_lock); | ||
430 | |||
431 | wb = &bdi->wb; | ||
432 | wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", | ||
433 | dev_name(bdi->dev)); | ||
434 | /* | ||
435 | * If task creation fails, then readd the bdi to | ||
436 | * the pending list and force writeout of the bdi | ||
437 | * from this forker thread. That will free some memory | ||
438 | * and we can try again. | ||
439 | */ | ||
440 | if (IS_ERR(wb->task)) { | ||
441 | wb->task = NULL; | ||
442 | |||
443 | /* | ||
444 | * Add this 'bdi' to the back, so we get | ||
445 | * a chance to flush other bdi's to free | ||
446 | * memory. | ||
447 | */ | ||
448 | spin_lock(&bdi_lock); | ||
449 | list_add_tail(&bdi->bdi_list, &bdi_pending_list); | ||
450 | spin_unlock(&bdi_lock); | ||
451 | |||
452 | bdi_flush_io(bdi); | ||
453 | } | ||
454 | } | ||
455 | |||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * Add the default flusher task that gets created for any bdi | ||
461 | * that has dirty data pending writeout | ||
462 | */ | ||
463 | void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) | ||
464 | { | ||
465 | if (!bdi_cap_writeback_dirty(bdi)) | ||
466 | return; | ||
467 | |||
468 | if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) { | ||
469 | printk(KERN_ERR "bdi %p/%s is not registered!\n", | ||
470 | bdi, bdi->name); | ||
471 | return; | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * Check with the helper whether to proceed adding a task. Will only | ||
476 | * abort if we two or more simultanous calls to | ||
477 | * bdi_add_default_flusher_task() occured, further additions will block | ||
478 | * waiting for previous additions to finish. | ||
479 | */ | ||
480 | if (!test_and_set_bit(BDI_pending, &bdi->state)) { | ||
481 | list_move_tail(&bdi->bdi_list, &bdi_pending_list); | ||
482 | |||
483 | /* | ||
484 | * We are now on the pending list, wake up bdi_forker_task() | ||
485 | * to finish the job and add us back to the active bdi_list | ||
486 | */ | ||
487 | wake_up_process(default_backing_dev_info.wb.task); | ||
488 | } | ||
489 | } | ||
490 | |||
196 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 491 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
197 | const char *fmt, ...) | 492 | const char *fmt, ...) |
198 | { | 493 | { |
@@ -211,9 +506,35 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
211 | goto exit; | 506 | goto exit; |
212 | } | 507 | } |
213 | 508 | ||
509 | spin_lock(&bdi_lock); | ||
510 | list_add_tail(&bdi->bdi_list, &bdi_list); | ||
511 | spin_unlock(&bdi_lock); | ||
512 | |||
214 | bdi->dev = dev; | 513 | bdi->dev = dev; |
215 | bdi_debug_register(bdi, dev_name(dev)); | ||
216 | 514 | ||
515 | /* | ||
516 | * Just start the forker thread for our default backing_dev_info, | ||
517 | * and add other bdi's to the list. They will get a thread created | ||
518 | * on-demand when they need it. | ||
519 | */ | ||
520 | if (bdi_cap_flush_forker(bdi)) { | ||
521 | struct bdi_writeback *wb = &bdi->wb; | ||
522 | |||
523 | wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s", | ||
524 | dev_name(dev)); | ||
525 | if (IS_ERR(wb->task)) { | ||
526 | wb->task = NULL; | ||
527 | ret = -ENOMEM; | ||
528 | |||
529 | spin_lock(&bdi_lock); | ||
530 | list_del(&bdi->bdi_list); | ||
531 | spin_unlock(&bdi_lock); | ||
532 | goto exit; | ||
533 | } | ||
534 | } | ||
535 | |||
536 | bdi_debug_register(bdi, dev_name(dev)); | ||
537 | set_bit(BDI_registered, &bdi->state); | ||
217 | exit: | 538 | exit: |
218 | return ret; | 539 | return ret; |
219 | } | 540 | } |
@@ -225,9 +546,42 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) | |||
225 | } | 546 | } |
226 | EXPORT_SYMBOL(bdi_register_dev); | 547 | EXPORT_SYMBOL(bdi_register_dev); |
227 | 548 | ||
549 | /* | ||
550 | * Remove bdi from the global list and shutdown any threads we have running | ||
551 | */ | ||
552 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) | ||
553 | { | ||
554 | struct bdi_writeback *wb; | ||
555 | |||
556 | if (!bdi_cap_writeback_dirty(bdi)) | ||
557 | return; | ||
558 | |||
559 | /* | ||
560 | * If setup is pending, wait for that to complete first | ||
561 | */ | ||
562 | wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, | ||
563 | TASK_UNINTERRUPTIBLE); | ||
564 | |||
565 | /* | ||
566 | * Make sure nobody finds us on the bdi_list anymore | ||
567 | */ | ||
568 | spin_lock(&bdi_lock); | ||
569 | list_del(&bdi->bdi_list); | ||
570 | spin_unlock(&bdi_lock); | ||
571 | |||
572 | /* | ||
573 | * Finally, kill the kernel threads. We don't need to be RCU | ||
574 | * safe anymore, since the bdi is gone from visibility. | ||
575 | */ | ||
576 | list_for_each_entry(wb, &bdi->wb_list, list) | ||
577 | kthread_stop(wb->task); | ||
578 | } | ||
579 | |||
228 | void bdi_unregister(struct backing_dev_info *bdi) | 580 | void bdi_unregister(struct backing_dev_info *bdi) |
229 | { | 581 | { |
230 | if (bdi->dev) { | 582 | if (bdi->dev) { |
583 | if (!bdi_cap_flush_forker(bdi)) | ||
584 | bdi_wb_shutdown(bdi); | ||
231 | bdi_debug_unregister(bdi); | 585 | bdi_debug_unregister(bdi); |
232 | device_unregister(bdi->dev); | 586 | device_unregister(bdi->dev); |
233 | bdi->dev = NULL; | 587 | bdi->dev = NULL; |
@@ -237,14 +591,25 @@ EXPORT_SYMBOL(bdi_unregister); | |||
237 | 591 | ||
238 | int bdi_init(struct backing_dev_info *bdi) | 592 | int bdi_init(struct backing_dev_info *bdi) |
239 | { | 593 | { |
240 | int i; | 594 | int i, err; |
241 | int err; | ||
242 | 595 | ||
243 | bdi->dev = NULL; | 596 | bdi->dev = NULL; |
244 | 597 | ||
245 | bdi->min_ratio = 0; | 598 | bdi->min_ratio = 0; |
246 | bdi->max_ratio = 100; | 599 | bdi->max_ratio = 100; |
247 | bdi->max_prop_frac = PROP_FRAC_BASE; | 600 | bdi->max_prop_frac = PROP_FRAC_BASE; |
601 | spin_lock_init(&bdi->wb_lock); | ||
602 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
603 | INIT_LIST_HEAD(&bdi->wb_list); | ||
604 | INIT_LIST_HEAD(&bdi->work_list); | ||
605 | |||
606 | bdi_wb_init(&bdi->wb, bdi); | ||
607 | |||
608 | /* | ||
609 | * Just one thread support for now, hard code mask and count | ||
610 | */ | ||
611 | bdi->wb_mask = 1; | ||
612 | bdi->wb_cnt = 1; | ||
248 | 613 | ||
249 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 614 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
250 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); | 615 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); |
@@ -269,6 +634,8 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
269 | { | 634 | { |
270 | int i; | 635 | int i; |
271 | 636 | ||
637 | WARN_ON(bdi_has_dirty_io(bdi)); | ||
638 | |||
272 | bdi_unregister(bdi); | 639 | bdi_unregister(bdi); |
273 | 640 | ||
274 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 641 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 81627ebcd313..25e7770309b8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -36,15 +36,6 @@ | |||
36 | #include <linux/pagevec.h> | 36 | #include <linux/pagevec.h> |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * The maximum number of pages to writeout in a single bdflush/kupdate | ||
40 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
41 | * enormous amounts of time, which would block a userspace task which has | ||
42 | * been forced to throttle against that inode. Also, the code reevaluates | ||
43 | * the dirty each time it has written this many pages. | ||
44 | */ | ||
45 | #define MAX_WRITEBACK_PAGES 1024 | ||
46 | |||
47 | /* | ||
48 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited | 39 | * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited |
49 | * will look to see if it needs to force writeback or throttling. | 40 | * will look to see if it needs to force writeback or throttling. |
50 | */ | 41 | */ |
@@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode); | |||
117 | /* End of sysctl-exported parameters */ | 108 | /* End of sysctl-exported parameters */ |
118 | 109 | ||
119 | 110 | ||
120 | static void background_writeout(unsigned long _min_pages); | ||
121 | |||
122 | /* | 111 | /* |
123 | * Scale the writeback cache size proportional to the relative writeout speeds. | 112 | * Scale the writeback cache size proportional to the relative writeout speeds. |
124 | * | 113 | * |
@@ -320,15 +309,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty) | |||
320 | /* | 309 | /* |
321 | * | 310 | * |
322 | */ | 311 | */ |
323 | static DEFINE_SPINLOCK(bdi_lock); | ||
324 | static unsigned int bdi_min_ratio; | 312 | static unsigned int bdi_min_ratio; |
325 | 313 | ||
326 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | 314 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) |
327 | { | 315 | { |
328 | int ret = 0; | 316 | int ret = 0; |
329 | unsigned long flags; | ||
330 | 317 | ||
331 | spin_lock_irqsave(&bdi_lock, flags); | 318 | spin_lock(&bdi_lock); |
332 | if (min_ratio > bdi->max_ratio) { | 319 | if (min_ratio > bdi->max_ratio) { |
333 | ret = -EINVAL; | 320 | ret = -EINVAL; |
334 | } else { | 321 | } else { |
@@ -340,27 +327,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
340 | ret = -EINVAL; | 327 | ret = -EINVAL; |
341 | } | 328 | } |
342 | } | 329 | } |
343 | spin_unlock_irqrestore(&bdi_lock, flags); | 330 | spin_unlock(&bdi_lock); |
344 | 331 | ||
345 | return ret; | 332 | return ret; |
346 | } | 333 | } |
347 | 334 | ||
348 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | 335 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) |
349 | { | 336 | { |
350 | unsigned long flags; | ||
351 | int ret = 0; | 337 | int ret = 0; |
352 | 338 | ||
353 | if (max_ratio > 100) | 339 | if (max_ratio > 100) |
354 | return -EINVAL; | 340 | return -EINVAL; |
355 | 341 | ||
356 | spin_lock_irqsave(&bdi_lock, flags); | 342 | spin_lock(&bdi_lock); |
357 | if (bdi->min_ratio > max_ratio) { | 343 | if (bdi->min_ratio > max_ratio) { |
358 | ret = -EINVAL; | 344 | ret = -EINVAL; |
359 | } else { | 345 | } else { |
360 | bdi->max_ratio = max_ratio; | 346 | bdi->max_ratio = max_ratio; |
361 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 347 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
362 | } | 348 | } |
363 | spin_unlock_irqrestore(&bdi_lock, flags); | 349 | spin_unlock(&bdi_lock); |
364 | 350 | ||
365 | return ret; | 351 | return ret; |
366 | } | 352 | } |
@@ -546,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
546 | * up. | 532 | * up. |
547 | */ | 533 | */ |
548 | if (bdi_nr_reclaimable > bdi_thresh) { | 534 | if (bdi_nr_reclaimable > bdi_thresh) { |
549 | writeback_inodes(&wbc); | 535 | writeback_inodes_wbc(&wbc); |
550 | pages_written += write_chunk - wbc.nr_to_write; | 536 | pages_written += write_chunk - wbc.nr_to_write; |
551 | get_dirty_limits(&background_thresh, &dirty_thresh, | 537 | get_dirty_limits(&background_thresh, &dirty_thresh, |
552 | &bdi_thresh, bdi); | 538 | &bdi_thresh, bdi); |
@@ -575,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
575 | if (pages_written >= write_chunk) | 561 | if (pages_written >= write_chunk) |
576 | break; /* We've done our duty */ | 562 | break; /* We've done our duty */ |
577 | 563 | ||
578 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 564 | schedule_timeout(1); |
579 | } | 565 | } |
580 | 566 | ||
581 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && | 567 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && |
@@ -594,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
594 | * background_thresh, to keep the amount of dirty memory low. | 580 | * background_thresh, to keep the amount of dirty memory low. |
595 | */ | 581 | */ |
596 | if ((laptop_mode && pages_written) || | 582 | if ((laptop_mode && pages_written) || |
597 | (!laptop_mode && (global_page_state(NR_FILE_DIRTY) | 583 | (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) |
598 | + global_page_state(NR_UNSTABLE_NFS) | 584 | + global_page_state(NR_UNSTABLE_NFS)) |
599 | > background_thresh))) | 585 | > background_thresh))) { |
600 | pdflush_operation(background_writeout, 0); | 586 | struct writeback_control wbc = { |
587 | .bdi = bdi, | ||
588 | .sync_mode = WB_SYNC_NONE, | ||
589 | .nr_to_write = nr_writeback, | ||
590 | }; | ||
591 | |||
592 | |||
593 | bdi_start_writeback(&wbc); | ||
594 | } | ||
601 | } | 595 | } |
602 | 596 | ||
603 | void set_page_dirty_balance(struct page *page, int page_mkwrite) | 597 | void set_page_dirty_balance(struct page *page, int page_mkwrite) |
@@ -681,153 +675,35 @@ void throttle_vm_writeout(gfp_t gfp_mask) | |||
681 | } | 675 | } |
682 | } | 676 | } |
683 | 677 | ||
684 | /* | ||
685 | * writeback at least _min_pages, and keep writing until the amount of dirty | ||
686 | * memory is less than the background threshold, or until we're all clean. | ||
687 | */ | ||
688 | static void background_writeout(unsigned long _min_pages) | ||
689 | { | ||
690 | long min_pages = _min_pages; | ||
691 | struct writeback_control wbc = { | ||
692 | .bdi = NULL, | ||
693 | .sync_mode = WB_SYNC_NONE, | ||
694 | .older_than_this = NULL, | ||
695 | .nr_to_write = 0, | ||
696 | .nonblocking = 1, | ||
697 | .range_cyclic = 1, | ||
698 | }; | ||
699 | |||
700 | for ( ; ; ) { | ||
701 | unsigned long background_thresh; | ||
702 | unsigned long dirty_thresh; | ||
703 | |||
704 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | ||
705 | if (global_page_state(NR_FILE_DIRTY) + | ||
706 | global_page_state(NR_UNSTABLE_NFS) < background_thresh | ||
707 | && min_pages <= 0) | ||
708 | break; | ||
709 | wbc.more_io = 0; | ||
710 | wbc.encountered_congestion = 0; | ||
711 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
712 | wbc.pages_skipped = 0; | ||
713 | writeback_inodes(&wbc); | ||
714 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
715 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | ||
716 | /* Wrote less than expected */ | ||
717 | if (wbc.encountered_congestion || wbc.more_io) | ||
718 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
719 | else | ||
720 | break; | ||
721 | } | ||
722 | } | ||
723 | } | ||
724 | |||
725 | /* | ||
726 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | ||
727 | * the whole world. Returns 0 if a pdflush thread was dispatched. Returns | ||
728 | * -1 if all pdflush threads were busy. | ||
729 | */ | ||
730 | int wakeup_pdflush(long nr_pages) | ||
731 | { | ||
732 | if (nr_pages == 0) | ||
733 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
734 | global_page_state(NR_UNSTABLE_NFS); | ||
735 | return pdflush_operation(background_writeout, nr_pages); | ||
736 | } | ||
737 | |||
738 | static void wb_timer_fn(unsigned long unused); | ||
739 | static void laptop_timer_fn(unsigned long unused); | 678 | static void laptop_timer_fn(unsigned long unused); |
740 | 679 | ||
741 | static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0); | ||
742 | static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); | 680 | static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); |
743 | 681 | ||
744 | /* | 682 | /* |
745 | * Periodic writeback of "old" data. | ||
746 | * | ||
747 | * Define "old": the first time one of an inode's pages is dirtied, we mark the | ||
748 | * dirtying-time in the inode's address_space. So this periodic writeback code | ||
749 | * just walks the superblock inode list, writing back any inodes which are | ||
750 | * older than a specific point in time. | ||
751 | * | ||
752 | * Try to run once per dirty_writeback_interval. But if a writeback event | ||
753 | * takes longer than a dirty_writeback_interval interval, then leave a | ||
754 | * one-second gap. | ||
755 | * | ||
756 | * older_than_this takes precedence over nr_to_write. So we'll only write back | ||
757 | * all dirty pages if they are all attached to "old" mappings. | ||
758 | */ | ||
759 | static void wb_kupdate(unsigned long arg) | ||
760 | { | ||
761 | unsigned long oldest_jif; | ||
762 | unsigned long start_jif; | ||
763 | unsigned long next_jif; | ||
764 | long nr_to_write; | ||
765 | struct writeback_control wbc = { | ||
766 | .bdi = NULL, | ||
767 | .sync_mode = WB_SYNC_NONE, | ||
768 | .older_than_this = &oldest_jif, | ||
769 | .nr_to_write = 0, | ||
770 | .nonblocking = 1, | ||
771 | .for_kupdate = 1, | ||
772 | .range_cyclic = 1, | ||
773 | }; | ||
774 | |||
775 | sync_supers(); | ||
776 | |||
777 | oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); | ||
778 | start_jif = jiffies; | ||
779 | next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10); | ||
780 | nr_to_write = global_page_state(NR_FILE_DIRTY) + | ||
781 | global_page_state(NR_UNSTABLE_NFS) + | ||
782 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
783 | while (nr_to_write > 0) { | ||
784 | wbc.more_io = 0; | ||
785 | wbc.encountered_congestion = 0; | ||
786 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
787 | writeback_inodes(&wbc); | ||
788 | if (wbc.nr_to_write > 0) { | ||
789 | if (wbc.encountered_congestion || wbc.more_io) | ||
790 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
791 | else | ||
792 | break; /* All the old data is written */ | ||
793 | } | ||
794 | nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
795 | } | ||
796 | if (time_before(next_jif, jiffies + HZ)) | ||
797 | next_jif = jiffies + HZ; | ||
798 | if (dirty_writeback_interval) | ||
799 | mod_timer(&wb_timer, next_jif); | ||
800 | } | ||
801 | |||
802 | /* | ||
803 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs | 683 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs |
804 | */ | 684 | */ |
805 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, | 685 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, |
806 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 686 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) |
807 | { | 687 | { |
808 | proc_dointvec(table, write, file, buffer, length, ppos); | 688 | proc_dointvec(table, write, file, buffer, length, ppos); |
809 | if (dirty_writeback_interval) | ||
810 | mod_timer(&wb_timer, jiffies + | ||
811 | msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
812 | else | ||
813 | del_timer(&wb_timer); | ||
814 | return 0; | 689 | return 0; |
815 | } | 690 | } |
816 | 691 | ||
817 | static void wb_timer_fn(unsigned long unused) | 692 | static void do_laptop_sync(struct work_struct *work) |
818 | { | ||
819 | if (pdflush_operation(wb_kupdate, 0) < 0) | ||
820 | mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */ | ||
821 | } | ||
822 | |||
823 | static void laptop_flush(unsigned long unused) | ||
824 | { | 693 | { |
825 | sys_sync(); | 694 | wakeup_flusher_threads(0); |
695 | kfree(work); | ||
826 | } | 696 | } |
827 | 697 | ||
828 | static void laptop_timer_fn(unsigned long unused) | 698 | static void laptop_timer_fn(unsigned long unused) |
829 | { | 699 | { |
830 | pdflush_operation(laptop_flush, 0); | 700 | struct work_struct *work; |
701 | |||
702 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
703 | if (work) { | ||
704 | INIT_WORK(work, do_laptop_sync); | ||
705 | schedule_work(work); | ||
706 | } | ||
831 | } | 707 | } |
832 | 708 | ||
833 | /* | 709 | /* |
@@ -910,8 +786,6 @@ void __init page_writeback_init(void) | |||
910 | { | 786 | { |
911 | int shift; | 787 | int shift; |
912 | 788 | ||
913 | mod_timer(&wb_timer, | ||
914 | jiffies + msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
915 | writeback_set_ratelimit(); | 789 | writeback_set_ratelimit(); |
916 | register_cpu_notifier(&ratelimit_nb); | 790 | register_cpu_notifier(&ratelimit_nb); |
917 | 791 | ||
diff --git a/mm/pdflush.c b/mm/pdflush.c deleted file mode 100644 index 235ac440c44e..000000000000 --- a/mm/pdflush.c +++ /dev/null | |||
@@ -1,269 +0,0 @@ | |||
1 | /* | ||
2 | * mm/pdflush.c - worker threads for writing back filesystem data | ||
3 | * | ||
4 | * Copyright (C) 2002, Linus Torvalds. | ||
5 | * | ||
6 | * 09Apr2002 Andrew Morton | ||
7 | * Initial version | ||
8 | * 29Feb2004 kaos@sgi.com | ||
9 | * Move worker thread creation to kthread to avoid chewing | ||
10 | * up stack space with nested calls to kernel_thread. | ||
11 | */ | ||
12 | |||
13 | #include <linux/sched.h> | ||
14 | #include <linux/list.h> | ||
15 | #include <linux/signal.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/gfp.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/fs.h> /* Needed by writeback.h */ | ||
21 | #include <linux/writeback.h> /* Prototypes pdflush_operation() */ | ||
22 | #include <linux/kthread.h> | ||
23 | #include <linux/cpuset.h> | ||
24 | #include <linux/freezer.h> | ||
25 | |||
26 | |||
27 | /* | ||
28 | * Minimum and maximum number of pdflush instances | ||
29 | */ | ||
30 | #define MIN_PDFLUSH_THREADS 2 | ||
31 | #define MAX_PDFLUSH_THREADS 8 | ||
32 | |||
33 | static void start_one_pdflush_thread(void); | ||
34 | |||
35 | |||
36 | /* | ||
37 | * The pdflush threads are worker threads for writing back dirty data. | ||
38 | * Ideally, we'd like one thread per active disk spindle. But the disk | ||
39 | * topology is very hard to divine at this level. Instead, we take | ||
40 | * care in various places to prevent more than one pdflush thread from | ||
41 | * performing writeback against a single filesystem. pdflush threads | ||
42 | * have the PF_FLUSHER flag set in current->flags to aid in this. | ||
43 | */ | ||
44 | |||
45 | /* | ||
46 | * All the pdflush threads. Protected by pdflush_lock | ||
47 | */ | ||
48 | static LIST_HEAD(pdflush_list); | ||
49 | static DEFINE_SPINLOCK(pdflush_lock); | ||
50 | |||
51 | /* | ||
52 | * The count of currently-running pdflush threads. Protected | ||
53 | * by pdflush_lock. | ||
54 | * | ||
55 | * Readable by sysctl, but not writable. Published to userspace at | ||
56 | * /proc/sys/vm/nr_pdflush_threads. | ||
57 | */ | ||
58 | int nr_pdflush_threads = 0; | ||
59 | |||
60 | /* | ||
61 | * The time at which the pdflush thread pool last went empty | ||
62 | */ | ||
63 | static unsigned long last_empty_jifs; | ||
64 | |||
65 | /* | ||
66 | * The pdflush thread. | ||
67 | * | ||
68 | * Thread pool management algorithm: | ||
69 | * | ||
70 | * - The minimum and maximum number of pdflush instances are bound | ||
71 | * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS. | ||
72 | * | ||
73 | * - If there have been no idle pdflush instances for 1 second, create | ||
74 | * a new one. | ||
75 | * | ||
76 | * - If the least-recently-went-to-sleep pdflush thread has been asleep | ||
77 | * for more than one second, terminate a thread. | ||
78 | */ | ||
79 | |||
80 | /* | ||
81 | * A structure for passing work to a pdflush thread. Also for passing | ||
82 | * state information between pdflush threads. Protected by pdflush_lock. | ||
83 | */ | ||
84 | struct pdflush_work { | ||
85 | struct task_struct *who; /* The thread */ | ||
86 | void (*fn)(unsigned long); /* A callback function */ | ||
87 | unsigned long arg0; /* An argument to the callback */ | ||
88 | struct list_head list; /* On pdflush_list, when idle */ | ||
89 | unsigned long when_i_went_to_sleep; | ||
90 | }; | ||
91 | |||
92 | static int __pdflush(struct pdflush_work *my_work) | ||
93 | { | ||
94 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
95 | set_freezable(); | ||
96 | my_work->fn = NULL; | ||
97 | my_work->who = current; | ||
98 | INIT_LIST_HEAD(&my_work->list); | ||
99 | |||
100 | spin_lock_irq(&pdflush_lock); | ||
101 | for ( ; ; ) { | ||
102 | struct pdflush_work *pdf; | ||
103 | |||
104 | set_current_state(TASK_INTERRUPTIBLE); | ||
105 | list_move(&my_work->list, &pdflush_list); | ||
106 | my_work->when_i_went_to_sleep = jiffies; | ||
107 | spin_unlock_irq(&pdflush_lock); | ||
108 | schedule(); | ||
109 | try_to_freeze(); | ||
110 | spin_lock_irq(&pdflush_lock); | ||
111 | if (!list_empty(&my_work->list)) { | ||
112 | /* | ||
113 | * Someone woke us up, but without removing our control | ||
114 | * structure from the global list. swsusp will do this | ||
115 | * in try_to_freeze()->refrigerator(). Handle it. | ||
116 | */ | ||
117 | my_work->fn = NULL; | ||
118 | continue; | ||
119 | } | ||
120 | if (my_work->fn == NULL) { | ||
121 | printk("pdflush: bogus wakeup\n"); | ||
122 | continue; | ||
123 | } | ||
124 | spin_unlock_irq(&pdflush_lock); | ||
125 | |||
126 | (*my_work->fn)(my_work->arg0); | ||
127 | |||
128 | spin_lock_irq(&pdflush_lock); | ||
129 | |||
130 | /* | ||
131 | * Thread creation: For how long have there been zero | ||
132 | * available threads? | ||
133 | * | ||
134 | * To throttle creation, we reset last_empty_jifs. | ||
135 | */ | ||
136 | if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { | ||
137 | if (list_empty(&pdflush_list)) { | ||
138 | if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) { | ||
139 | last_empty_jifs = jiffies; | ||
140 | nr_pdflush_threads++; | ||
141 | spin_unlock_irq(&pdflush_lock); | ||
142 | start_one_pdflush_thread(); | ||
143 | spin_lock_irq(&pdflush_lock); | ||
144 | } | ||
145 | } | ||
146 | } | ||
147 | |||
148 | my_work->fn = NULL; | ||
149 | |||
150 | /* | ||
151 | * Thread destruction: For how long has the sleepiest | ||
152 | * thread slept? | ||
153 | */ | ||
154 | if (list_empty(&pdflush_list)) | ||
155 | continue; | ||
156 | if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) | ||
157 | continue; | ||
158 | pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); | ||
159 | if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { | ||
160 | /* Limit exit rate */ | ||
161 | pdf->when_i_went_to_sleep = jiffies; | ||
162 | break; /* exeunt */ | ||
163 | } | ||
164 | } | ||
165 | nr_pdflush_threads--; | ||
166 | spin_unlock_irq(&pdflush_lock); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Of course, my_work wants to be just a local in __pdflush(). It is | ||
172 | * separated out in this manner to hopefully prevent the compiler from | ||
173 | * performing unfortunate optimisations against the auto variables. Because | ||
174 | * these are visible to other tasks and CPUs. (No problem has actually | ||
175 | * been observed. This is just paranoia). | ||
176 | */ | ||
177 | static int pdflush(void *dummy) | ||
178 | { | ||
179 | struct pdflush_work my_work; | ||
180 | cpumask_var_t cpus_allowed; | ||
181 | |||
182 | /* | ||
183 | * Since the caller doesn't even check kthread_run() worked, let's not | ||
184 | * freak out too much if this fails. | ||
185 | */ | ||
186 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | ||
187 | printk(KERN_WARNING "pdflush failed to allocate cpumask\n"); | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * pdflush can spend a lot of time doing encryption via dm-crypt. We | ||
193 | * don't want to do that at keventd's priority. | ||
194 | */ | ||
195 | set_user_nice(current, 0); | ||
196 | |||
197 | /* | ||
198 | * Some configs put our parent kthread in a limited cpuset, | ||
199 | * which kthread() overrides, forcing cpus_allowed == cpu_all_mask. | ||
200 | * Our needs are more modest - cut back to our cpusets cpus_allowed. | ||
201 | * This is needed as pdflush's are dynamically created and destroyed. | ||
202 | * The boottime pdflush's are easily placed w/o these 2 lines. | ||
203 | */ | ||
204 | cpuset_cpus_allowed(current, cpus_allowed); | ||
205 | set_cpus_allowed_ptr(current, cpus_allowed); | ||
206 | free_cpumask_var(cpus_allowed); | ||
207 | |||
208 | return __pdflush(&my_work); | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Attempt to wake up a pdflush thread, and get it to do some work for you. | ||
213 | * Returns zero if it indeed managed to find a worker thread, and passed your | ||
214 | * payload to it. | ||
215 | */ | ||
216 | int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0) | ||
217 | { | ||
218 | unsigned long flags; | ||
219 | int ret = 0; | ||
220 | |||
221 | BUG_ON(fn == NULL); /* Hard to diagnose if it's deferred */ | ||
222 | |||
223 | spin_lock_irqsave(&pdflush_lock, flags); | ||
224 | if (list_empty(&pdflush_list)) { | ||
225 | ret = -1; | ||
226 | } else { | ||
227 | struct pdflush_work *pdf; | ||
228 | |||
229 | pdf = list_entry(pdflush_list.next, struct pdflush_work, list); | ||
230 | list_del_init(&pdf->list); | ||
231 | if (list_empty(&pdflush_list)) | ||
232 | last_empty_jifs = jiffies; | ||
233 | pdf->fn = fn; | ||
234 | pdf->arg0 = arg0; | ||
235 | wake_up_process(pdf->who); | ||
236 | } | ||
237 | spin_unlock_irqrestore(&pdflush_lock, flags); | ||
238 | |||
239 | return ret; | ||
240 | } | ||
241 | |||
242 | static void start_one_pdflush_thread(void) | ||
243 | { | ||
244 | struct task_struct *k; | ||
245 | |||
246 | k = kthread_run(pdflush, NULL, "pdflush"); | ||
247 | if (unlikely(IS_ERR(k))) { | ||
248 | spin_lock_irq(&pdflush_lock); | ||
249 | nr_pdflush_threads--; | ||
250 | spin_unlock_irq(&pdflush_lock); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | static int __init pdflush_init(void) | ||
255 | { | ||
256 | int i; | ||
257 | |||
258 | /* | ||
259 | * Pre-set nr_pdflush_threads... If we fail to create, | ||
260 | * the count will be decremented. | ||
261 | */ | ||
262 | nr_pdflush_threads = MIN_PDFLUSH_THREADS; | ||
263 | |||
264 | for (i = 0; i < MIN_PDFLUSH_THREADS; i++) | ||
265 | start_one_pdflush_thread(); | ||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | module_init(pdflush_init); | ||
diff --git a/mm/swap_state.c b/mm/swap_state.c index 42cd38eba79f..5ae6b8b78c80 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -34,6 +34,7 @@ static const struct address_space_operations swap_aops = { | |||
34 | }; | 34 | }; |
35 | 35 | ||
36 | static struct backing_dev_info swap_backing_dev_info = { | 36 | static struct backing_dev_info swap_backing_dev_info = { |
37 | .name = "swap", | ||
37 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, | 38 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, |
38 | .unplug_io_fn = swap_unplug_io_fn, | 39 | .unplug_io_fn = swap_unplug_io_fn, |
39 | }; | 40 | }; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 94e86dd6954c..ba8228e0a806 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1720,7 +1720,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1720 | */ | 1720 | */ |
1721 | if (total_scanned > sc->swap_cluster_max + | 1721 | if (total_scanned > sc->swap_cluster_max + |
1722 | sc->swap_cluster_max / 2) { | 1722 | sc->swap_cluster_max / 2) { |
1723 | wakeup_pdflush(laptop_mode ? 0 : total_scanned); | 1723 | wakeup_flusher_threads(laptop_mode ? 0 : total_scanned); |
1724 | sc->may_writepage = 1; | 1724 | sc->may_writepage = 1; |
1725 | } | 1725 | } |
1726 | 1726 | ||