aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2010-07-25 07:29:22 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-08-07 12:53:56 -0400
commit6467716a37673e8d47b4984eb19839bdad0a8353 (patch)
tree8b2bfd38d53e31c47654162d4ce9220c4424a31d
parent253c34e9b10c30d3064be654b5b78fbc1a8b1896 (diff)
writeback: optimize periodic bdi thread wakeups
Whe the first inode for a bdi is marked dirty, we wake up the bdi thread which should take care of the periodic background write-out. However, the write-out will actually start only 'dirty_writeback_interval' centisecs later, so we can delay the wake-up. This change was requested by Nick Piggin who pointed out that if we delay the wake-up, we weed out 2 unnecessary contex switches, which matters because '__mark_inode_dirty()' is a hot-path function. This patch introduces a new function - 'bdi_wakeup_thread_delayed()', which sets up a timer to wake-up the bdi thread and returns. So the wake-up is delayed. We also delete the timer in bdi threads just before writing-back. And synchronously delete it when unregistering bdi. At the unregister point the bdi does not have any users, so no one can arm it again. Since now we take 'bdi->wb_lock' in the timer, which can execute in softirq context, we have to use 'spin_lock_bh()' for 'bdi->wb_lock'. This patch makes this change as well. This patch also moves the 'bdi_wb_init()' function down in the file to avoid forward-declaration of 'bdi_wakeup_thread_delayed()'. Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--fs/fs-writeback.c36
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--mm/backing-dev.c73
3 files changed, 70 insertions, 41 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 55f6e46e06f1..bfa2df2c7ce2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -76,7 +76,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
76{ 76{
77 trace_writeback_queue(bdi, work); 77 trace_writeback_queue(bdi, work);
78 78
79 spin_lock(&bdi->wb_lock); 79 spin_lock_bh(&bdi->wb_lock);
80 list_add_tail(&work->list, &bdi->work_list); 80 list_add_tail(&work->list, &bdi->work_list);
81 if (bdi->wb.task) { 81 if (bdi->wb.task) {
82 wake_up_process(bdi->wb.task); 82 wake_up_process(bdi->wb.task);
@@ -88,7 +88,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
88 trace_writeback_nothread(bdi, work); 88 trace_writeback_nothread(bdi, work);
89 wake_up_process(default_backing_dev_info.wb.task); 89 wake_up_process(default_backing_dev_info.wb.task);
90 } 90 }
91 spin_unlock(&bdi->wb_lock); 91 spin_unlock_bh(&bdi->wb_lock);
92} 92}
93 93
94static void 94static void
@@ -704,13 +704,13 @@ get_next_work_item(struct backing_dev_info *bdi)
704{ 704{
705 struct wb_writeback_work *work = NULL; 705 struct wb_writeback_work *work = NULL;
706 706
707 spin_lock(&bdi->wb_lock); 707 spin_lock_bh(&bdi->wb_lock);
708 if (!list_empty(&bdi->work_list)) { 708 if (!list_empty(&bdi->work_list)) {
709 work = list_entry(bdi->work_list.next, 709 work = list_entry(bdi->work_list.next,
710 struct wb_writeback_work, list); 710 struct wb_writeback_work, list);
711 list_del_init(&work->list); 711 list_del_init(&work->list);
712 } 712 }
713 spin_unlock(&bdi->wb_lock); 713 spin_unlock_bh(&bdi->wb_lock);
714 return work; 714 return work;
715} 715}
716 716
@@ -810,6 +810,12 @@ int bdi_writeback_thread(void *data)
810 trace_writeback_thread_start(bdi); 810 trace_writeback_thread_start(bdi);
811 811
812 while (!kthread_should_stop()) { 812 while (!kthread_should_stop()) {
813 /*
814 * Remove own delayed wake-up timer, since we are already awake
815 * and we'll take care of the preriodic write-back.
816 */
817 del_timer(&wb->wakeup_timer);
818
813 pages_written = wb_do_writeback(wb, 0); 819 pages_written = wb_do_writeback(wb, 0);
814 820
815 trace_writeback_pages_written(pages_written); 821 trace_writeback_pages_written(pages_written);
@@ -868,26 +874,6 @@ void wakeup_flusher_threads(long nr_pages)
868 rcu_read_unlock(); 874 rcu_read_unlock();
869} 875}
870 876
871/*
872 * This function is used when the first inode for this bdi is marked dirty. It
873 * wakes-up the corresponding bdi thread which should then take care of the
874 * periodic background write-out of dirty inodes.
875 */
876static void wakeup_bdi_thread(struct backing_dev_info *bdi)
877{
878 spin_lock(&bdi->wb_lock);
879 if (bdi->wb.task)
880 wake_up_process(bdi->wb.task);
881 else
882 /*
883 * When bdi tasks are inactive for long time, they are killed.
884 * In this case we have to wake-up the forker thread which
885 * should create and run the bdi thread.
886 */
887 wake_up_process(default_backing_dev_info.wb.task);
888 spin_unlock(&bdi->wb_lock);
889}
890
891static noinline void block_dump___mark_inode_dirty(struct inode *inode) 877static noinline void block_dump___mark_inode_dirty(struct inode *inode)
892{ 878{
893 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 879 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1019,7 +1005,7 @@ out:
1019 spin_unlock(&inode_lock); 1005 spin_unlock(&inode_lock);
1020 1006
1021 if (wakeup_bdi) 1007 if (wakeup_bdi)
1022 wakeup_bdi_thread(bdi); 1008 bdi_wakeup_thread_delayed(bdi);
1023} 1009}
1024EXPORT_SYMBOL(__mark_inode_dirty); 1010EXPORT_SYMBOL(__mark_inode_dirty);
1025 1011
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 71b6223e0a77..7628219e5386 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -52,6 +52,7 @@ struct bdi_writeback {
52 unsigned long last_active; /* last time bdi thread was active */ 52 unsigned long last_active; /* last time bdi thread was active */
53 53
54 struct task_struct *task; /* writeback thread */ 54 struct task_struct *task; /* writeback thread */
55 struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
55 struct list_head b_dirty; /* dirty inodes */ 56 struct list_head b_dirty; /* dirty inodes */
56 struct list_head b_io; /* parked for writeback */ 57 struct list_head b_io; /* parked for writeback */
57 struct list_head b_more_io; /* parked for more writeback */ 58 struct list_head b_more_io; /* parked for more writeback */
@@ -105,6 +106,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi);
105int bdi_writeback_thread(void *data); 106int bdi_writeback_thread(void *data);
106int bdi_has_dirty_io(struct backing_dev_info *bdi); 107int bdi_has_dirty_io(struct backing_dev_info *bdi);
107void bdi_arm_supers_timer(void); 108void bdi_arm_supers_timer(void);
109void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
108 110
109extern spinlock_t bdi_lock; 111extern spinlock_t bdi_lock;
110extern struct list_head bdi_list; 112extern struct list_head bdi_list;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a9a08d88a745..cfff7225138c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -248,17 +248,6 @@ static int __init default_bdi_init(void)
248} 248}
249subsys_initcall(default_bdi_init); 249subsys_initcall(default_bdi_init);
250 250
251static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
252{
253 memset(wb, 0, sizeof(*wb));
254
255 wb->bdi = bdi;
256 wb->last_old_flush = jiffies;
257 INIT_LIST_HEAD(&wb->b_dirty);
258 INIT_LIST_HEAD(&wb->b_io);
259 INIT_LIST_HEAD(&wb->b_more_io);
260}
261
262int bdi_has_dirty_io(struct backing_dev_info *bdi) 251int bdi_has_dirty_io(struct backing_dev_info *bdi)
263{ 252{
264 return wb_has_dirty_io(&bdi->wb); 253 return wb_has_dirty_io(&bdi->wb);
@@ -316,6 +305,43 @@ static void sync_supers_timer_fn(unsigned long unused)
316 bdi_arm_supers_timer(); 305 bdi_arm_supers_timer();
317} 306}
318 307
308static void wakeup_timer_fn(unsigned long data)
309{
310 struct backing_dev_info *bdi = (struct backing_dev_info *)data;
311
312 spin_lock_bh(&bdi->wb_lock);
313 if (bdi->wb.task) {
314 wake_up_process(bdi->wb.task);
315 } else {
316 /*
317 * When bdi tasks are inactive for long time, they are killed.
318 * In this case we have to wake-up the forker thread which
319 * should create and run the bdi thread.
320 */
321 wake_up_process(default_backing_dev_info.wb.task);
322 }
323 spin_unlock_bh(&bdi->wb_lock);
324}
325
326/*
327 * This function is used when the first inode for this bdi is marked dirty. It
328 * wakes-up the corresponding bdi thread which should then take care of the
329 * periodic background write-out of dirty inodes. Since the write-out would
330 * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
331 * set up a timer which wakes the bdi thread up later.
332 *
333 * Note, we wouldn't bother setting up the timer, but this function is on the
334 * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
335 * by delaying the wake-up.
336 */
337void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
338{
339 unsigned long timeout;
340
341 timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
342 mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
343}
344
319/* 345/*
320 * Calculate the longest interval (jiffies) bdi threads are allowed to be 346 * Calculate the longest interval (jiffies) bdi threads are allowed to be
321 * inactive. 347 * inactive.
@@ -353,8 +379,10 @@ static int bdi_forker_thread(void *ptr)
353 * Temporary measure, we want to make sure we don't see 379 * Temporary measure, we want to make sure we don't see
354 * dirty data on the default backing_dev_info 380 * dirty data on the default backing_dev_info
355 */ 381 */
356 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) 382 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
383 del_timer(&me->wakeup_timer);
357 wb_do_writeback(me, 0); 384 wb_do_writeback(me, 0);
385 }
358 386
359 spin_lock_bh(&bdi_lock); 387 spin_lock_bh(&bdi_lock);
360 set_current_state(TASK_INTERRUPTIBLE); 388 set_current_state(TASK_INTERRUPTIBLE);
@@ -386,7 +414,7 @@ static int bdi_forker_thread(void *ptr)
386 break; 414 break;
387 } 415 }
388 416
389 spin_lock(&bdi->wb_lock); 417 spin_lock_bh(&bdi->wb_lock);
390 /* 418 /*
391 * If there is no work to do and the bdi thread was 419 * If there is no work to do and the bdi thread was
392 * inactive long enough - kill it. The wb_lock is taken 420 * inactive long enough - kill it. The wb_lock is taken
@@ -403,7 +431,7 @@ static int bdi_forker_thread(void *ptr)
403 action = KILL_THREAD; 431 action = KILL_THREAD;
404 break; 432 break;
405 } 433 }
406 spin_unlock(&bdi->wb_lock); 434 spin_unlock_bh(&bdi->wb_lock);
407 } 435 }
408 spin_unlock_bh(&bdi_lock); 436 spin_unlock_bh(&bdi_lock);
409 437
@@ -427,9 +455,9 @@ static int bdi_forker_thread(void *ptr)
427 * The spinlock makes sure we do not lose 455 * The spinlock makes sure we do not lose
428 * wake-ups when racing with 'bdi_queue_work()'. 456 * wake-ups when racing with 'bdi_queue_work()'.
429 */ 457 */
430 spin_lock(&bdi->wb_lock); 458 spin_lock_bh(&bdi->wb_lock);
431 bdi->wb.task = task; 459 bdi->wb.task = task;
432 spin_unlock(&bdi->wb_lock); 460 spin_unlock_bh(&bdi->wb_lock);
433 } 461 }
434 break; 462 break;
435 463
@@ -586,6 +614,7 @@ void bdi_unregister(struct backing_dev_info *bdi)
586 if (bdi->dev) { 614 if (bdi->dev) {
587 trace_writeback_bdi_unregister(bdi); 615 trace_writeback_bdi_unregister(bdi);
588 bdi_prune_sb(bdi); 616 bdi_prune_sb(bdi);
617 del_timer_sync(&bdi->wb.wakeup_timer);
589 618
590 if (!bdi_cap_flush_forker(bdi)) 619 if (!bdi_cap_flush_forker(bdi))
591 bdi_wb_shutdown(bdi); 620 bdi_wb_shutdown(bdi);
@@ -596,6 +625,18 @@ void bdi_unregister(struct backing_dev_info *bdi)
596} 625}
597EXPORT_SYMBOL(bdi_unregister); 626EXPORT_SYMBOL(bdi_unregister);
598 627
628static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
629{
630 memset(wb, 0, sizeof(*wb));
631
632 wb->bdi = bdi;
633 wb->last_old_flush = jiffies;
634 INIT_LIST_HEAD(&wb->b_dirty);
635 INIT_LIST_HEAD(&wb->b_io);
636 INIT_LIST_HEAD(&wb->b_more_io);
637 setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
638}
639
599int bdi_init(struct backing_dev_info *bdi) 640int bdi_init(struct backing_dev_info *bdi)
600{ 641{
601 int i, err; 642 int i, err;