diff options
| -rw-r--r-- | fs/fs-writeback.c | 102 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 15 | ||||
| -rw-r--r-- | include/trace/events/writeback.h | 5 | ||||
| -rw-r--r-- | mm/backing-dev.c | 255 |
4 files changed, 65 insertions, 312 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 21f46fb3a101..8067d3719e94 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -22,7 +22,6 @@ | |||
| 22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
| 23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
| 24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
| 25 | #include <linux/freezer.h> | ||
| 26 | #include <linux/writeback.h> | 25 | #include <linux/writeback.h> |
| 27 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
| 28 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
| @@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head) | |||
| 88 | #define CREATE_TRACE_POINTS | 87 | #define CREATE_TRACE_POINTS |
| 89 | #include <trace/events/writeback.h> | 88 | #include <trace/events/writeback.h> |
| 90 | 89 | ||
| 91 | /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ | ||
| 92 | static void bdi_wakeup_flusher(struct backing_dev_info *bdi) | ||
| 93 | { | ||
| 94 | if (bdi->wb.task) { | ||
| 95 | wake_up_process(bdi->wb.task); | ||
| 96 | } else { | ||
| 97 | /* | ||
| 98 | * The bdi thread isn't there, wake up the forker thread which | ||
| 99 | * will create and run it. | ||
| 100 | */ | ||
| 101 | wake_up_process(default_backing_dev_info.wb.task); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | static void bdi_queue_work(struct backing_dev_info *bdi, | 90 | static void bdi_queue_work(struct backing_dev_info *bdi, |
| 106 | struct wb_writeback_work *work) | 91 | struct wb_writeback_work *work) |
| 107 | { | 92 | { |
| @@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, | |||
| 109 | 94 | ||
| 110 | spin_lock_bh(&bdi->wb_lock); | 95 | spin_lock_bh(&bdi->wb_lock); |
| 111 | list_add_tail(&work->list, &bdi->work_list); | 96 | list_add_tail(&work->list, &bdi->work_list); |
| 112 | if (!bdi->wb.task) | ||
| 113 | trace_writeback_nothread(bdi, work); | ||
| 114 | bdi_wakeup_flusher(bdi); | ||
| 115 | spin_unlock_bh(&bdi->wb_lock); | 97 | spin_unlock_bh(&bdi->wb_lock); |
| 98 | |||
| 99 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | ||
| 116 | } | 100 | } |
| 117 | 101 | ||
| 118 | static void | 102 | static void |
| @@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
| 127 | */ | 111 | */ |
| 128 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 112 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
| 129 | if (!work) { | 113 | if (!work) { |
| 130 | if (bdi->wb.task) { | 114 | trace_writeback_nowork(bdi); |
| 131 | trace_writeback_nowork(bdi); | 115 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
| 132 | wake_up_process(bdi->wb.task); | ||
| 133 | } | ||
| 134 | return; | 116 | return; |
| 135 | } | 117 | } |
| 136 | 118 | ||
| @@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
| 177 | * writeback as soon as there is no other work to do. | 159 | * writeback as soon as there is no other work to do. |
| 178 | */ | 160 | */ |
| 179 | trace_writeback_wake_background(bdi); | 161 | trace_writeback_wake_background(bdi); |
| 180 | spin_lock_bh(&bdi->wb_lock); | 162 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
| 181 | bdi_wakeup_flusher(bdi); | ||
| 182 | spin_unlock_bh(&bdi->wb_lock); | ||
| 183 | } | 163 | } |
| 184 | 164 | ||
| 185 | /* | 165 | /* |
| @@ -1020,66 +1000,48 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 1020 | 1000 | ||
| 1021 | /* | 1001 | /* |
| 1022 | * Handle writeback of dirty data for the device backed by this bdi. Also | 1002 | * Handle writeback of dirty data for the device backed by this bdi. Also |
| 1023 | * wakes up periodically and does kupdated style flushing. | 1003 | * reschedules periodically and does kupdated style flushing. |
| 1024 | */ | 1004 | */ |
| 1025 | int bdi_writeback_thread(void *data) | 1005 | void bdi_writeback_workfn(struct work_struct *work) |
| 1026 | { | 1006 | { |
| 1027 | struct bdi_writeback *wb = data; | 1007 | struct bdi_writeback *wb = container_of(to_delayed_work(work), |
| 1008 | struct bdi_writeback, dwork); | ||
| 1028 | struct backing_dev_info *bdi = wb->bdi; | 1009 | struct backing_dev_info *bdi = wb->bdi; |
| 1029 | long pages_written; | 1010 | long pages_written; |
| 1030 | 1011 | ||
| 1031 | current->flags |= PF_SWAPWRITE; | 1012 | current->flags |= PF_SWAPWRITE; |
| 1032 | set_freezable(); | ||
| 1033 | wb->last_active = jiffies; | ||
| 1034 | |||
| 1035 | /* | ||
| 1036 | * Our parent may run at a different priority, just set us to normal | ||
| 1037 | */ | ||
| 1038 | set_user_nice(current, 0); | ||
| 1039 | |||
| 1040 | trace_writeback_thread_start(bdi); | ||
| 1041 | 1013 | ||
| 1042 | while (!kthread_freezable_should_stop(NULL)) { | 1014 | if (likely(!current_is_workqueue_rescuer() || |
| 1015 | list_empty(&bdi->bdi_list))) { | ||
| 1043 | /* | 1016 | /* |
| 1044 | * Remove own delayed wake-up timer, since we are already awake | 1017 | * The normal path. Keep writing back @bdi until its |
| 1045 | * and we'll take care of the periodic write-back. | 1018 | * work_list is empty. Note that this path is also taken |
| 1019 | * if @bdi is shutting down even when we're running off the | ||
| 1020 | * rescuer as work_list needs to be drained. | ||
| 1046 | */ | 1021 | */ |
| 1047 | del_timer(&wb->wakeup_timer); | 1022 | do { |
| 1048 | 1023 | pages_written = wb_do_writeback(wb, 0); | |
| 1049 | pages_written = wb_do_writeback(wb, 0); | 1024 | trace_writeback_pages_written(pages_written); |
| 1050 | 1025 | } while (!list_empty(&bdi->work_list)); | |
| 1026 | } else { | ||
| 1027 | /* | ||
| 1028 | * bdi_wq can't get enough workers and we're running off | ||
| 1029 | * the emergency worker. Don't hog it. Hopefully, 1024 is | ||
| 1030 | * enough for efficient IO. | ||
| 1031 | */ | ||
| 1032 | pages_written = writeback_inodes_wb(&bdi->wb, 1024, | ||
| 1033 | WB_REASON_FORKER_THREAD); | ||
| 1051 | trace_writeback_pages_written(pages_written); | 1034 | trace_writeback_pages_written(pages_written); |
| 1052 | |||
| 1053 | if (pages_written) | ||
| 1054 | wb->last_active = jiffies; | ||
| 1055 | |||
| 1056 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1057 | if (!list_empty(&bdi->work_list) || kthread_should_stop()) { | ||
| 1058 | __set_current_state(TASK_RUNNING); | ||
| 1059 | continue; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) | ||
| 1063 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
| 1064 | else { | ||
| 1065 | /* | ||
| 1066 | * We have nothing to do, so can go sleep without any | ||
| 1067 | * timeout and save power. When a work is queued or | ||
| 1068 | * something is made dirty - we will be woken up. | ||
| 1069 | */ | ||
| 1070 | schedule(); | ||
| 1071 | } | ||
| 1072 | } | 1035 | } |
| 1073 | 1036 | ||
| 1074 | /* Flush any work that raced with us exiting */ | 1037 | if (!list_empty(&bdi->work_list) || |
| 1075 | if (!list_empty(&bdi->work_list)) | 1038 | (wb_has_dirty_io(wb) && dirty_writeback_interval)) |
| 1076 | wb_do_writeback(wb, 1); | 1039 | queue_delayed_work(bdi_wq, &wb->dwork, |
| 1040 | msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
| 1077 | 1041 | ||
| 1078 | trace_writeback_thread_stop(bdi); | 1042 | current->flags &= ~PF_SWAPWRITE; |
| 1079 | return 0; | ||
| 1080 | } | 1043 | } |
| 1081 | 1044 | ||
| 1082 | |||
| 1083 | /* | 1045 | /* |
| 1084 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | 1046 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
| 1085 | * the whole world. | 1047 | * the whole world. |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a5ef27f5411a..c3881553f7d1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/writeback.h> | 18 | #include <linux/writeback.h> |
| 19 | #include <linux/atomic.h> | 19 | #include <linux/atomic.h> |
| 20 | #include <linux/sysctl.h> | 20 | #include <linux/sysctl.h> |
| 21 | #include <linux/workqueue.h> | ||
| 21 | 22 | ||
| 22 | struct page; | 23 | struct page; |
| 23 | struct device; | 24 | struct device; |
| @@ -27,7 +28,6 @@ struct dentry; | |||
| 27 | * Bits in backing_dev_info.state | 28 | * Bits in backing_dev_info.state |
| 28 | */ | 29 | */ |
| 29 | enum bdi_state { | 30 | enum bdi_state { |
| 30 | BDI_pending, /* On its way to being activated */ | ||
| 31 | BDI_wb_alloc, /* Default embedded wb allocated */ | 31 | BDI_wb_alloc, /* Default embedded wb allocated */ |
| 32 | BDI_async_congested, /* The async (write) queue is getting full */ | 32 | BDI_async_congested, /* The async (write) queue is getting full */ |
| 33 | BDI_sync_congested, /* The sync queue is getting full */ | 33 | BDI_sync_congested, /* The sync queue is getting full */ |
| @@ -53,10 +53,8 @@ struct bdi_writeback { | |||
| 53 | unsigned int nr; | 53 | unsigned int nr; |
| 54 | 54 | ||
| 55 | unsigned long last_old_flush; /* last old data flush */ | 55 | unsigned long last_old_flush; /* last old data flush */ |
| 56 | unsigned long last_active; /* last time bdi thread was active */ | ||
| 57 | 56 | ||
| 58 | struct task_struct *task; /* writeback thread */ | 57 | struct delayed_work dwork; /* work item used for writeback */ |
| 59 | struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ | ||
| 60 | struct list_head b_dirty; /* dirty inodes */ | 58 | struct list_head b_dirty; /* dirty inodes */ |
| 61 | struct list_head b_io; /* parked for writeback */ | 59 | struct list_head b_io; /* parked for writeback */ |
| 62 | struct list_head b_more_io; /* parked for more writeback */ | 60 | struct list_head b_more_io; /* parked for more writeback */ |
| @@ -123,7 +121,7 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); | |||
| 123 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | 121 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
| 124 | enum wb_reason reason); | 122 | enum wb_reason reason); |
| 125 | void bdi_start_background_writeback(struct backing_dev_info *bdi); | 123 | void bdi_start_background_writeback(struct backing_dev_info *bdi); |
| 126 | int bdi_writeback_thread(void *data); | 124 | void bdi_writeback_workfn(struct work_struct *work); |
| 127 | int bdi_has_dirty_io(struct backing_dev_info *bdi); | 125 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
| 128 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); | 126 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); |
| 129 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); | 127 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); |
| @@ -131,6 +129,8 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); | |||
| 131 | extern spinlock_t bdi_lock; | 129 | extern spinlock_t bdi_lock; |
| 132 | extern struct list_head bdi_list; | 130 | extern struct list_head bdi_list; |
| 133 | 131 | ||
| 132 | extern struct workqueue_struct *bdi_wq; | ||
| 133 | |||
| 134 | static inline int wb_has_dirty_io(struct bdi_writeback *wb) | 134 | static inline int wb_has_dirty_io(struct bdi_writeback *wb) |
| 135 | { | 135 | { |
| 136 | return !list_empty(&wb->b_dirty) || | 136 | return !list_empty(&wb->b_dirty) || |
| @@ -335,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) | |||
| 335 | return bdi->capabilities & BDI_CAP_SWAP_BACKED; | 335 | return bdi->capabilities & BDI_CAP_SWAP_BACKED; |
| 336 | } | 336 | } |
| 337 | 337 | ||
| 338 | static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi) | ||
| 339 | { | ||
| 340 | return bdi == &default_backing_dev_info; | ||
| 341 | } | ||
| 342 | |||
| 343 | static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) | 338 | static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) |
| 344 | { | 339 | { |
| 345 | return bdi_cap_writeback_dirty(mapping->backing_dev_info); | 340 | return bdi_cap_writeback_dirty(mapping->backing_dev_info); |
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6a16fd2e70ed..464ea82e10db 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h | |||
| @@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |||
| 183 | DEFINE_EVENT(writeback_work_class, name, \ | 183 | DEFINE_EVENT(writeback_work_class, name, \ |
| 184 | TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ | 184 | TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ |
| 185 | TP_ARGS(bdi, work)) | 185 | TP_ARGS(bdi, work)) |
| 186 | DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); | ||
| 187 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); | 186 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); |
| 188 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); | 187 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); |
| 189 | DEFINE_WRITEBACK_WORK_EVENT(writeback_start); | 188 | DEFINE_WRITEBACK_WORK_EVENT(writeback_start); |
| @@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \ | |||
| 222 | 221 | ||
| 223 | DEFINE_WRITEBACK_EVENT(writeback_nowork); | 222 | DEFINE_WRITEBACK_EVENT(writeback_nowork); |
| 224 | DEFINE_WRITEBACK_EVENT(writeback_wake_background); | 223 | DEFINE_WRITEBACK_EVENT(writeback_wake_background); |
| 225 | DEFINE_WRITEBACK_EVENT(writeback_wake_thread); | ||
| 226 | DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); | ||
| 227 | DEFINE_WRITEBACK_EVENT(writeback_bdi_register); | 224 | DEFINE_WRITEBACK_EVENT(writeback_bdi_register); |
| 228 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); | 225 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); |
| 229 | DEFINE_WRITEBACK_EVENT(writeback_thread_start); | ||
| 230 | DEFINE_WRITEBACK_EVENT(writeback_thread_stop); | ||
| 231 | 226 | ||
| 232 | DECLARE_EVENT_CLASS(wbc_class, | 227 | DECLARE_EVENT_CLASS(wbc_class, |
| 233 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), | 228 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 657569b3fcf6..2857d4f6bca4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -37,6 +37,9 @@ static struct class *bdi_class; | |||
| 37 | DEFINE_SPINLOCK(bdi_lock); | 37 | DEFINE_SPINLOCK(bdi_lock); |
| 38 | LIST_HEAD(bdi_list); | 38 | LIST_HEAD(bdi_list); |
| 39 | 39 | ||
| 40 | /* bdi_wq serves all asynchronous writeback tasks */ | ||
| 41 | struct workqueue_struct *bdi_wq; | ||
| 42 | |||
| 40 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) | 43 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) |
| 41 | { | 44 | { |
| 42 | if (wb1 < wb2) { | 45 | if (wb1 < wb2) { |
| @@ -255,6 +258,11 @@ static int __init default_bdi_init(void) | |||
| 255 | { | 258 | { |
| 256 | int err; | 259 | int err; |
| 257 | 260 | ||
| 261 | bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | | ||
| 262 | WQ_UNBOUND, 0); | ||
| 263 | if (!bdi_wq) | ||
| 264 | return -ENOMEM; | ||
| 265 | |||
| 258 | err = bdi_init(&default_backing_dev_info); | 266 | err = bdi_init(&default_backing_dev_info); |
| 259 | if (!err) | 267 | if (!err) |
| 260 | bdi_register(&default_backing_dev_info, NULL, "default"); | 268 | bdi_register(&default_backing_dev_info, NULL, "default"); |
| @@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) | |||
| 269 | return wb_has_dirty_io(&bdi->wb); | 277 | return wb_has_dirty_io(&bdi->wb); |
| 270 | } | 278 | } |
| 271 | 279 | ||
| 272 | static void wakeup_timer_fn(unsigned long data) | ||
| 273 | { | ||
| 274 | struct backing_dev_info *bdi = (struct backing_dev_info *)data; | ||
| 275 | |||
| 276 | spin_lock_bh(&bdi->wb_lock); | ||
| 277 | if (bdi->wb.task) { | ||
| 278 | trace_writeback_wake_thread(bdi); | ||
| 279 | wake_up_process(bdi->wb.task); | ||
| 280 | } else if (bdi->dev) { | ||
| 281 | /* | ||
| 282 | * When bdi tasks are inactive for long time, they are killed. | ||
| 283 | * In this case we have to wake-up the forker thread which | ||
| 284 | * should create and run the bdi thread. | ||
| 285 | */ | ||
| 286 | trace_writeback_wake_forker_thread(bdi); | ||
| 287 | wake_up_process(default_backing_dev_info.wb.task); | ||
| 288 | } | ||
| 289 | spin_unlock_bh(&bdi->wb_lock); | ||
| 290 | } | ||
| 291 | |||
| 292 | /* | 280 | /* |
| 293 | * This function is used when the first inode for this bdi is marked dirty. It | 281 | * This function is used when the first inode for this bdi is marked dirty. It |
| 294 | * wakes-up the corresponding bdi thread which should then take care of the | 282 | * wakes-up the corresponding bdi thread which should then take care of the |
| @@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) | |||
| 305 | unsigned long timeout; | 293 | unsigned long timeout; |
| 306 | 294 | ||
| 307 | timeout = msecs_to_jiffies(dirty_writeback_interval * 10); | 295 | timeout = msecs_to_jiffies(dirty_writeback_interval * 10); |
| 308 | mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); | 296 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); |
| 309 | } | ||
| 310 | |||
| 311 | /* | ||
| 312 | * Calculate the longest interval (jiffies) bdi threads are allowed to be | ||
| 313 | * inactive. | ||
| 314 | */ | ||
| 315 | static unsigned long bdi_longest_inactive(void) | ||
| 316 | { | ||
| 317 | unsigned long interval; | ||
| 318 | |||
| 319 | interval = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
| 320 | return max(5UL * 60 * HZ, interval); | ||
| 321 | } | ||
| 322 | |||
| 323 | /* | ||
| 324 | * Clear pending bit and wakeup anybody waiting for flusher thread creation or | ||
| 325 | * shutdown | ||
| 326 | */ | ||
| 327 | static void bdi_clear_pending(struct backing_dev_info *bdi) | ||
| 328 | { | ||
| 329 | clear_bit(BDI_pending, &bdi->state); | ||
| 330 | smp_mb__after_clear_bit(); | ||
| 331 | wake_up_bit(&bdi->state, BDI_pending); | ||
| 332 | } | ||
| 333 | |||
| 334 | static int bdi_forker_thread(void *ptr) | ||
| 335 | { | ||
| 336 | struct bdi_writeback *me = ptr; | ||
| 337 | |||
| 338 | current->flags |= PF_SWAPWRITE; | ||
| 339 | set_freezable(); | ||
| 340 | |||
| 341 | /* | ||
| 342 | * Our parent may run at a different priority, just set us to normal | ||
| 343 | */ | ||
| 344 | set_user_nice(current, 0); | ||
| 345 | |||
| 346 | for (;;) { | ||
| 347 | struct task_struct *task = NULL; | ||
| 348 | struct backing_dev_info *bdi; | ||
| 349 | enum { | ||
| 350 | NO_ACTION, /* Nothing to do */ | ||
| 351 | FORK_THREAD, /* Fork bdi thread */ | ||
| 352 | KILL_THREAD, /* Kill inactive bdi thread */ | ||
| 353 | } action = NO_ACTION; | ||
| 354 | |||
| 355 | /* | ||
| 356 | * Temporary measure, we want to make sure we don't see | ||
| 357 | * dirty data on the default backing_dev_info | ||
| 358 | */ | ||
| 359 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { | ||
| 360 | del_timer(&me->wakeup_timer); | ||
| 361 | wb_do_writeback(me, 0); | ||
| 362 | } | ||
| 363 | |||
| 364 | spin_lock_bh(&bdi_lock); | ||
| 365 | /* | ||
| 366 | * In the following loop we are going to check whether we have | ||
| 367 | * some work to do without any synchronization with tasks | ||
| 368 | * waking us up to do work for them. Set the task state here | ||
| 369 | * so that we don't miss wakeups after verifying conditions. | ||
| 370 | */ | ||
| 371 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 372 | |||
| 373 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | ||
| 374 | bool have_dirty_io; | ||
| 375 | |||
| 376 | if (!bdi_cap_writeback_dirty(bdi) || | ||
| 377 | bdi_cap_flush_forker(bdi)) | ||
| 378 | continue; | ||
| 379 | |||
| 380 | WARN(!test_bit(BDI_registered, &bdi->state), | ||
| 381 | "bdi %p/%s is not registered!\n", bdi, bdi->name); | ||
| 382 | |||
| 383 | have_dirty_io = !list_empty(&bdi->work_list) || | ||
| 384 | wb_has_dirty_io(&bdi->wb); | ||
| 385 | |||
| 386 | /* | ||
| 387 | * If the bdi has work to do, but the thread does not | ||
| 388 | * exist - create it. | ||
| 389 | */ | ||
| 390 | if (!bdi->wb.task && have_dirty_io) { | ||
| 391 | /* | ||
| 392 | * Set the pending bit - if someone will try to | ||
| 393 | * unregister this bdi - it'll wait on this bit. | ||
| 394 | */ | ||
| 395 | set_bit(BDI_pending, &bdi->state); | ||
| 396 | action = FORK_THREAD; | ||
| 397 | break; | ||
| 398 | } | ||
| 399 | |||
| 400 | spin_lock(&bdi->wb_lock); | ||
| 401 | |||
| 402 | /* | ||
| 403 | * If there is no work to do and the bdi thread was | ||
| 404 | * inactive long enough - kill it. The wb_lock is taken | ||
| 405 | * to make sure no-one adds more work to this bdi and | ||
| 406 | * wakes the bdi thread up. | ||
| 407 | */ | ||
| 408 | if (bdi->wb.task && !have_dirty_io && | ||
| 409 | time_after(jiffies, bdi->wb.last_active + | ||
| 410 | bdi_longest_inactive())) { | ||
| 411 | task = bdi->wb.task; | ||
| 412 | bdi->wb.task = NULL; | ||
| 413 | spin_unlock(&bdi->wb_lock); | ||
| 414 | set_bit(BDI_pending, &bdi->state); | ||
| 415 | action = KILL_THREAD; | ||
| 416 | break; | ||
| 417 | } | ||
| 418 | spin_unlock(&bdi->wb_lock); | ||
| 419 | } | ||
| 420 | spin_unlock_bh(&bdi_lock); | ||
| 421 | |||
| 422 | /* Keep working if default bdi still has things to do */ | ||
| 423 | if (!list_empty(&me->bdi->work_list)) | ||
| 424 | __set_current_state(TASK_RUNNING); | ||
| 425 | |||
| 426 | switch (action) { | ||
| 427 | case FORK_THREAD: | ||
| 428 | __set_current_state(TASK_RUNNING); | ||
| 429 | task = kthread_create(bdi_writeback_thread, &bdi->wb, | ||
| 430 | "flush-%s", dev_name(bdi->dev)); | ||
| 431 | if (IS_ERR(task)) { | ||
| 432 | /* | ||
| 433 | * If thread creation fails, force writeout of | ||
| 434 | * the bdi from the thread. Hopefully 1024 is | ||
| 435 | * large enough for efficient IO. | ||
| 436 | */ | ||
| 437 | writeback_inodes_wb(&bdi->wb, 1024, | ||
| 438 | WB_REASON_FORKER_THREAD); | ||
| 439 | } else { | ||
| 440 | /* | ||
| 441 | * The spinlock makes sure we do not lose | ||
| 442 | * wake-ups when racing with 'bdi_queue_work()'. | ||
| 443 | * And as soon as the bdi thread is visible, we | ||
| 444 | * can start it. | ||
| 445 | */ | ||
| 446 | spin_lock_bh(&bdi->wb_lock); | ||
| 447 | bdi->wb.task = task; | ||
| 448 | spin_unlock_bh(&bdi->wb_lock); | ||
| 449 | wake_up_process(task); | ||
| 450 | } | ||
| 451 | bdi_clear_pending(bdi); | ||
| 452 | break; | ||
| 453 | |||
| 454 | case KILL_THREAD: | ||
| 455 | __set_current_state(TASK_RUNNING); | ||
| 456 | kthread_stop(task); | ||
| 457 | bdi_clear_pending(bdi); | ||
| 458 | break; | ||
| 459 | |||
| 460 | case NO_ACTION: | ||
| 461 | if (!wb_has_dirty_io(me) || !dirty_writeback_interval) | ||
| 462 | /* | ||
| 463 | * There are no dirty data. The only thing we | ||
| 464 | * should now care about is checking for | ||
| 465 | * inactive bdi threads and killing them. Thus, | ||
| 466 | * let's sleep for longer time, save energy and | ||
| 467 | * be friendly for battery-driven devices. | ||
| 468 | */ | ||
| 469 | schedule_timeout(bdi_longest_inactive()); | ||
| 470 | else | ||
| 471 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
| 472 | try_to_freeze(); | ||
| 473 | break; | ||
| 474 | } | ||
| 475 | } | ||
| 476 | |||
| 477 | return 0; | ||
| 478 | } | 297 | } |
| 479 | 298 | ||
| 480 | /* | 299 | /* |
| @@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) | |||
| 487 | spin_unlock_bh(&bdi_lock); | 306 | spin_unlock_bh(&bdi_lock); |
| 488 | 307 | ||
| 489 | synchronize_rcu_expedited(); | 308 | synchronize_rcu_expedited(); |
| 309 | |||
| 310 | /* bdi_list is now unused, clear it to mark @bdi dying */ | ||
| 311 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
| 490 | } | 312 | } |
| 491 | 313 | ||
| 492 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 314 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
| @@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
| 506 | 328 | ||
| 507 | bdi->dev = dev; | 329 | bdi->dev = dev; |
| 508 | 330 | ||
| 509 | /* | ||
| 510 | * Just start the forker thread for our default backing_dev_info, | ||
| 511 | * and add other bdi's to the list. They will get a thread created | ||
| 512 | * on-demand when they need it. | ||
| 513 | */ | ||
| 514 | if (bdi_cap_flush_forker(bdi)) { | ||
| 515 | struct bdi_writeback *wb = &bdi->wb; | ||
| 516 | |||
| 517 | wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", | ||
| 518 | dev_name(dev)); | ||
| 519 | if (IS_ERR(wb->task)) | ||
| 520 | return PTR_ERR(wb->task); | ||
| 521 | } | ||
| 522 | |||
| 523 | bdi_debug_register(bdi, dev_name(dev)); | 331 | bdi_debug_register(bdi, dev_name(dev)); |
| 524 | set_bit(BDI_registered, &bdi->state); | 332 | set_bit(BDI_registered, &bdi->state); |
| 525 | 333 | ||
| @@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev); | |||
| 543 | */ | 351 | */ |
| 544 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) | 352 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) |
| 545 | { | 353 | { |
| 546 | struct task_struct *task; | ||
| 547 | |||
| 548 | if (!bdi_cap_writeback_dirty(bdi)) | 354 | if (!bdi_cap_writeback_dirty(bdi)) |
| 549 | return; | 355 | return; |
| 550 | 356 | ||
| @@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) | |||
| 554 | bdi_remove_from_list(bdi); | 360 | bdi_remove_from_list(bdi); |
| 555 | 361 | ||
| 556 | /* | 362 | /* |
| 557 | * If setup is pending, wait for that to complete first | 363 | * Drain work list and shutdown the delayed_work. At this point, |
| 364 | * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi | ||
| 365 | * is dying and its work_list needs to be drained no matter what. | ||
| 558 | */ | 366 | */ |
| 559 | wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, | 367 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
| 560 | TASK_UNINTERRUPTIBLE); | 368 | flush_delayed_work(&bdi->wb.dwork); |
| 369 | WARN_ON(!list_empty(&bdi->work_list)); | ||
| 561 | 370 | ||
| 562 | /* | 371 | /* |
| 563 | * Finally, kill the kernel thread. We don't need to be RCU | 372 | * This shouldn't be necessary unless @bdi for some reason has |
| 564 | * safe anymore, since the bdi is gone from visibility. | 373 | * unflushed dirty IO after work_list is drained. Do it anyway |
| 374 | * just in case. | ||
| 565 | */ | 375 | */ |
| 566 | spin_lock_bh(&bdi->wb_lock); | 376 | cancel_delayed_work_sync(&bdi->wb.dwork); |
| 567 | task = bdi->wb.task; | ||
| 568 | bdi->wb.task = NULL; | ||
| 569 | spin_unlock_bh(&bdi->wb_lock); | ||
| 570 | |||
| 571 | if (task) | ||
| 572 | kthread_stop(task); | ||
| 573 | } | 377 | } |
| 574 | 378 | ||
| 575 | /* | 379 | /* |
| @@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi) | |||
| 595 | bdi_set_min_ratio(bdi, 0); | 399 | bdi_set_min_ratio(bdi, 0); |
| 596 | trace_writeback_bdi_unregister(bdi); | 400 | trace_writeback_bdi_unregister(bdi); |
| 597 | bdi_prune_sb(bdi); | 401 | bdi_prune_sb(bdi); |
| 598 | del_timer_sync(&bdi->wb.wakeup_timer); | ||
| 599 | 402 | ||
| 600 | if (!bdi_cap_flush_forker(bdi)) | 403 | bdi_wb_shutdown(bdi); |
| 601 | bdi_wb_shutdown(bdi); | ||
| 602 | bdi_debug_unregister(bdi); | 404 | bdi_debug_unregister(bdi); |
| 603 | 405 | ||
| 604 | spin_lock_bh(&bdi->wb_lock); | 406 | spin_lock_bh(&bdi->wb_lock); |
| @@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) | |||
| 620 | INIT_LIST_HEAD(&wb->b_io); | 422 | INIT_LIST_HEAD(&wb->b_io); |
| 621 | INIT_LIST_HEAD(&wb->b_more_io); | 423 | INIT_LIST_HEAD(&wb->b_more_io); |
| 622 | spin_lock_init(&wb->list_lock); | 424 | spin_lock_init(&wb->list_lock); |
| 623 | setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); | 425 | INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); |
| 624 | } | 426 | } |
| 625 | 427 | ||
| 626 | /* | 428 | /* |
| @@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
| 693 | bdi_unregister(bdi); | 495 | bdi_unregister(bdi); |
| 694 | 496 | ||
| 695 | /* | 497 | /* |
| 696 | * If bdi_unregister() had already been called earlier, the | 498 | * If bdi_unregister() had already been called earlier, the dwork |
| 697 | * wakeup_timer could still be armed because bdi_prune_sb() | 499 | * could still be pending because bdi_prune_sb() can race with the |
| 698 | * can race with the bdi_wakeup_thread_delayed() calls from | 500 | * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). |
| 699 | * __mark_inode_dirty(). | ||
| 700 | */ | 501 | */ |
| 701 | del_timer_sync(&bdi->wb.wakeup_timer); | 502 | cancel_delayed_work_sync(&bdi->wb.dwork); |
| 702 | 503 | ||
| 703 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 504 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
| 704 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 505 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
