diff options
author | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2010-07-25 07:29:22 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-08-07 12:53:56 -0400 |
commit | 6467716a37673e8d47b4984eb19839bdad0a8353 (patch) | |
tree | 8b2bfd38d53e31c47654162d4ce9220c4424a31d | |
parent | 253c34e9b10c30d3064be654b5b78fbc1a8b1896 (diff) |
writeback: optimize periodic bdi thread wakeups
Whe the first inode for a bdi is marked dirty, we wake up the bdi thread which
should take care of the periodic background write-out. However, the write-out
will actually start only 'dirty_writeback_interval' centisecs later, so we can
delay the wake-up.
This change was requested by Nick Piggin who pointed out that if we delay the
wake-up, we weed out 2 unnecessary contex switches, which matters because
'__mark_inode_dirty()' is a hot-path function.
This patch introduces a new function - 'bdi_wakeup_thread_delayed()', which
sets up a timer to wake-up the bdi thread and returns. So the wake-up is
delayed.
We also delete the timer in bdi threads just before writing-back. And
synchronously delete it when unregistering bdi. At the unregister point the bdi
does not have any users, so no one can arm it again.
Since now we take 'bdi->wb_lock' in the timer, which can execute in softirq
context, we have to use 'spin_lock_bh()' for 'bdi->wb_lock'. This patch makes
this change as well.
This patch also moves the 'bdi_wb_init()' function down in the file to avoid
forward-declaration of 'bdi_wakeup_thread_delayed()'.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r-- | fs/fs-writeback.c | 36 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
-rw-r--r-- | mm/backing-dev.c | 73 |
3 files changed, 70 insertions, 41 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 55f6e46e06f1..bfa2df2c7ce2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -76,7 +76,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, | |||
76 | { | 76 | { |
77 | trace_writeback_queue(bdi, work); | 77 | trace_writeback_queue(bdi, work); |
78 | 78 | ||
79 | spin_lock(&bdi->wb_lock); | 79 | spin_lock_bh(&bdi->wb_lock); |
80 | list_add_tail(&work->list, &bdi->work_list); | 80 | list_add_tail(&work->list, &bdi->work_list); |
81 | if (bdi->wb.task) { | 81 | if (bdi->wb.task) { |
82 | wake_up_process(bdi->wb.task); | 82 | wake_up_process(bdi->wb.task); |
@@ -88,7 +88,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, | |||
88 | trace_writeback_nothread(bdi, work); | 88 | trace_writeback_nothread(bdi, work); |
89 | wake_up_process(default_backing_dev_info.wb.task); | 89 | wake_up_process(default_backing_dev_info.wb.task); |
90 | } | 90 | } |
91 | spin_unlock(&bdi->wb_lock); | 91 | spin_unlock_bh(&bdi->wb_lock); |
92 | } | 92 | } |
93 | 93 | ||
94 | static void | 94 | static void |
@@ -704,13 +704,13 @@ get_next_work_item(struct backing_dev_info *bdi) | |||
704 | { | 704 | { |
705 | struct wb_writeback_work *work = NULL; | 705 | struct wb_writeback_work *work = NULL; |
706 | 706 | ||
707 | spin_lock(&bdi->wb_lock); | 707 | spin_lock_bh(&bdi->wb_lock); |
708 | if (!list_empty(&bdi->work_list)) { | 708 | if (!list_empty(&bdi->work_list)) { |
709 | work = list_entry(bdi->work_list.next, | 709 | work = list_entry(bdi->work_list.next, |
710 | struct wb_writeback_work, list); | 710 | struct wb_writeback_work, list); |
711 | list_del_init(&work->list); | 711 | list_del_init(&work->list); |
712 | } | 712 | } |
713 | spin_unlock(&bdi->wb_lock); | 713 | spin_unlock_bh(&bdi->wb_lock); |
714 | return work; | 714 | return work; |
715 | } | 715 | } |
716 | 716 | ||
@@ -810,6 +810,12 @@ int bdi_writeback_thread(void *data) | |||
810 | trace_writeback_thread_start(bdi); | 810 | trace_writeback_thread_start(bdi); |
811 | 811 | ||
812 | while (!kthread_should_stop()) { | 812 | while (!kthread_should_stop()) { |
813 | /* | ||
814 | * Remove own delayed wake-up timer, since we are already awake | ||
815 | * and we'll take care of the preriodic write-back. | ||
816 | */ | ||
817 | del_timer(&wb->wakeup_timer); | ||
818 | |||
813 | pages_written = wb_do_writeback(wb, 0); | 819 | pages_written = wb_do_writeback(wb, 0); |
814 | 820 | ||
815 | trace_writeback_pages_written(pages_written); | 821 | trace_writeback_pages_written(pages_written); |
@@ -868,26 +874,6 @@ void wakeup_flusher_threads(long nr_pages) | |||
868 | rcu_read_unlock(); | 874 | rcu_read_unlock(); |
869 | } | 875 | } |
870 | 876 | ||
871 | /* | ||
872 | * This function is used when the first inode for this bdi is marked dirty. It | ||
873 | * wakes-up the corresponding bdi thread which should then take care of the | ||
874 | * periodic background write-out of dirty inodes. | ||
875 | */ | ||
876 | static void wakeup_bdi_thread(struct backing_dev_info *bdi) | ||
877 | { | ||
878 | spin_lock(&bdi->wb_lock); | ||
879 | if (bdi->wb.task) | ||
880 | wake_up_process(bdi->wb.task); | ||
881 | else | ||
882 | /* | ||
883 | * When bdi tasks are inactive for long time, they are killed. | ||
884 | * In this case we have to wake-up the forker thread which | ||
885 | * should create and run the bdi thread. | ||
886 | */ | ||
887 | wake_up_process(default_backing_dev_info.wb.task); | ||
888 | spin_unlock(&bdi->wb_lock); | ||
889 | } | ||
890 | |||
891 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 877 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
892 | { | 878 | { |
893 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 879 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { |
@@ -1019,7 +1005,7 @@ out: | |||
1019 | spin_unlock(&inode_lock); | 1005 | spin_unlock(&inode_lock); |
1020 | 1006 | ||
1021 | if (wakeup_bdi) | 1007 | if (wakeup_bdi) |
1022 | wakeup_bdi_thread(bdi); | 1008 | bdi_wakeup_thread_delayed(bdi); |
1023 | } | 1009 | } |
1024 | EXPORT_SYMBOL(__mark_inode_dirty); | 1010 | EXPORT_SYMBOL(__mark_inode_dirty); |
1025 | 1011 | ||
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 71b6223e0a77..7628219e5386 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -52,6 +52,7 @@ struct bdi_writeback { | |||
52 | unsigned long last_active; /* last time bdi thread was active */ | 52 | unsigned long last_active; /* last time bdi thread was active */ |
53 | 53 | ||
54 | struct task_struct *task; /* writeback thread */ | 54 | struct task_struct *task; /* writeback thread */ |
55 | struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ | ||
55 | struct list_head b_dirty; /* dirty inodes */ | 56 | struct list_head b_dirty; /* dirty inodes */ |
56 | struct list_head b_io; /* parked for writeback */ | 57 | struct list_head b_io; /* parked for writeback */ |
57 | struct list_head b_more_io; /* parked for more writeback */ | 58 | struct list_head b_more_io; /* parked for more writeback */ |
@@ -105,6 +106,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); | |||
105 | int bdi_writeback_thread(void *data); | 106 | int bdi_writeback_thread(void *data); |
106 | int bdi_has_dirty_io(struct backing_dev_info *bdi); | 107 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
107 | void bdi_arm_supers_timer(void); | 108 | void bdi_arm_supers_timer(void); |
109 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); | ||
108 | 110 | ||
109 | extern spinlock_t bdi_lock; | 111 | extern spinlock_t bdi_lock; |
110 | extern struct list_head bdi_list; | 112 | extern struct list_head bdi_list; |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a9a08d88a745..cfff7225138c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -248,17 +248,6 @@ static int __init default_bdi_init(void) | |||
248 | } | 248 | } |
249 | subsys_initcall(default_bdi_init); | 249 | subsys_initcall(default_bdi_init); |
250 | 250 | ||
251 | static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) | ||
252 | { | ||
253 | memset(wb, 0, sizeof(*wb)); | ||
254 | |||
255 | wb->bdi = bdi; | ||
256 | wb->last_old_flush = jiffies; | ||
257 | INIT_LIST_HEAD(&wb->b_dirty); | ||
258 | INIT_LIST_HEAD(&wb->b_io); | ||
259 | INIT_LIST_HEAD(&wb->b_more_io); | ||
260 | } | ||
261 | |||
262 | int bdi_has_dirty_io(struct backing_dev_info *bdi) | 251 | int bdi_has_dirty_io(struct backing_dev_info *bdi) |
263 | { | 252 | { |
264 | return wb_has_dirty_io(&bdi->wb); | 253 | return wb_has_dirty_io(&bdi->wb); |
@@ -316,6 +305,43 @@ static void sync_supers_timer_fn(unsigned long unused) | |||
316 | bdi_arm_supers_timer(); | 305 | bdi_arm_supers_timer(); |
317 | } | 306 | } |
318 | 307 | ||
308 | static void wakeup_timer_fn(unsigned long data) | ||
309 | { | ||
310 | struct backing_dev_info *bdi = (struct backing_dev_info *)data; | ||
311 | |||
312 | spin_lock_bh(&bdi->wb_lock); | ||
313 | if (bdi->wb.task) { | ||
314 | wake_up_process(bdi->wb.task); | ||
315 | } else { | ||
316 | /* | ||
317 | * When bdi tasks are inactive for long time, they are killed. | ||
318 | * In this case we have to wake-up the forker thread which | ||
319 | * should create and run the bdi thread. | ||
320 | */ | ||
321 | wake_up_process(default_backing_dev_info.wb.task); | ||
322 | } | ||
323 | spin_unlock_bh(&bdi->wb_lock); | ||
324 | } | ||
325 | |||
326 | /* | ||
327 | * This function is used when the first inode for this bdi is marked dirty. It | ||
328 | * wakes-up the corresponding bdi thread which should then take care of the | ||
329 | * periodic background write-out of dirty inodes. Since the write-out would | ||
330 | * starts only 'dirty_writeback_interval' centisecs from now anyway, we just | ||
331 | * set up a timer which wakes the bdi thread up later. | ||
332 | * | ||
333 | * Note, we wouldn't bother setting up the timer, but this function is on the | ||
334 | * fast-path (used by '__mark_inode_dirty()'), so we save few context switches | ||
335 | * by delaying the wake-up. | ||
336 | */ | ||
337 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) | ||
338 | { | ||
339 | unsigned long timeout; | ||
340 | |||
341 | timeout = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
342 | mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); | ||
343 | } | ||
344 | |||
319 | /* | 345 | /* |
320 | * Calculate the longest interval (jiffies) bdi threads are allowed to be | 346 | * Calculate the longest interval (jiffies) bdi threads are allowed to be |
321 | * inactive. | 347 | * inactive. |
@@ -353,8 +379,10 @@ static int bdi_forker_thread(void *ptr) | |||
353 | * Temporary measure, we want to make sure we don't see | 379 | * Temporary measure, we want to make sure we don't see |
354 | * dirty data on the default backing_dev_info | 380 | * dirty data on the default backing_dev_info |
355 | */ | 381 | */ |
356 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) | 382 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { |
383 | del_timer(&me->wakeup_timer); | ||
357 | wb_do_writeback(me, 0); | 384 | wb_do_writeback(me, 0); |
385 | } | ||
358 | 386 | ||
359 | spin_lock_bh(&bdi_lock); | 387 | spin_lock_bh(&bdi_lock); |
360 | set_current_state(TASK_INTERRUPTIBLE); | 388 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -386,7 +414,7 @@ static int bdi_forker_thread(void *ptr) | |||
386 | break; | 414 | break; |
387 | } | 415 | } |
388 | 416 | ||
389 | spin_lock(&bdi->wb_lock); | 417 | spin_lock_bh(&bdi->wb_lock); |
390 | /* | 418 | /* |
391 | * If there is no work to do and the bdi thread was | 419 | * If there is no work to do and the bdi thread was |
392 | * inactive long enough - kill it. The wb_lock is taken | 420 | * inactive long enough - kill it. The wb_lock is taken |
@@ -403,7 +431,7 @@ static int bdi_forker_thread(void *ptr) | |||
403 | action = KILL_THREAD; | 431 | action = KILL_THREAD; |
404 | break; | 432 | break; |
405 | } | 433 | } |
406 | spin_unlock(&bdi->wb_lock); | 434 | spin_unlock_bh(&bdi->wb_lock); |
407 | } | 435 | } |
408 | spin_unlock_bh(&bdi_lock); | 436 | spin_unlock_bh(&bdi_lock); |
409 | 437 | ||
@@ -427,9 +455,9 @@ static int bdi_forker_thread(void *ptr) | |||
427 | * The spinlock makes sure we do not lose | 455 | * The spinlock makes sure we do not lose |
428 | * wake-ups when racing with 'bdi_queue_work()'. | 456 | * wake-ups when racing with 'bdi_queue_work()'. |
429 | */ | 457 | */ |
430 | spin_lock(&bdi->wb_lock); | 458 | spin_lock_bh(&bdi->wb_lock); |
431 | bdi->wb.task = task; | 459 | bdi->wb.task = task; |
432 | spin_unlock(&bdi->wb_lock); | 460 | spin_unlock_bh(&bdi->wb_lock); |
433 | } | 461 | } |
434 | break; | 462 | break; |
435 | 463 | ||
@@ -586,6 +614,7 @@ void bdi_unregister(struct backing_dev_info *bdi) | |||
586 | if (bdi->dev) { | 614 | if (bdi->dev) { |
587 | trace_writeback_bdi_unregister(bdi); | 615 | trace_writeback_bdi_unregister(bdi); |
588 | bdi_prune_sb(bdi); | 616 | bdi_prune_sb(bdi); |
617 | del_timer_sync(&bdi->wb.wakeup_timer); | ||
589 | 618 | ||
590 | if (!bdi_cap_flush_forker(bdi)) | 619 | if (!bdi_cap_flush_forker(bdi)) |
591 | bdi_wb_shutdown(bdi); | 620 | bdi_wb_shutdown(bdi); |
@@ -596,6 +625,18 @@ void bdi_unregister(struct backing_dev_info *bdi) | |||
596 | } | 625 | } |
597 | EXPORT_SYMBOL(bdi_unregister); | 626 | EXPORT_SYMBOL(bdi_unregister); |
598 | 627 | ||
628 | static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) | ||
629 | { | ||
630 | memset(wb, 0, sizeof(*wb)); | ||
631 | |||
632 | wb->bdi = bdi; | ||
633 | wb->last_old_flush = jiffies; | ||
634 | INIT_LIST_HEAD(&wb->b_dirty); | ||
635 | INIT_LIST_HEAD(&wb->b_io); | ||
636 | INIT_LIST_HEAD(&wb->b_more_io); | ||
637 | setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); | ||
638 | } | ||
639 | |||
599 | int bdi_init(struct backing_dev_info *bdi) | 640 | int bdi_init(struct backing_dev_info *bdi) |
600 | { | 641 | { |
601 | int i, err; | 642 | int i, err; |