aboutsummaryrefslogtreecommitdiffstats
path: root/mm/backing-dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r--mm/backing-dev.c341
1 files changed, 323 insertions, 18 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6f163e0f0509..7f3fa79f25c0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1,8 +1,11 @@
1 1
2#include <linux/wait.h> 2#include <linux/wait.h>
3#include <linux/backing-dev.h> 3#include <linux/backing-dev.h>
4#include <linux/kthread.h>
5#include <linux/freezer.h>
4#include <linux/fs.h> 6#include <linux/fs.h>
5#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/mm.h>
6#include <linux/sched.h> 9#include <linux/sched.h>
7#include <linux/module.h> 10#include <linux/module.h>
8#include <linux/writeback.h> 11#include <linux/writeback.h>
@@ -22,8 +25,18 @@ struct backing_dev_info default_backing_dev_info = {
22EXPORT_SYMBOL_GPL(default_backing_dev_info); 25EXPORT_SYMBOL_GPL(default_backing_dev_info);
23 26
24static struct class *bdi_class; 27static struct class *bdi_class;
25DEFINE_MUTEX(bdi_lock); 28DEFINE_SPINLOCK(bdi_lock);
26LIST_HEAD(bdi_list); 29LIST_HEAD(bdi_list);
30LIST_HEAD(bdi_pending_list);
31
32static struct task_struct *sync_supers_tsk;
33static struct timer_list sync_supers_timer;
34
35static int bdi_sync_supers(void *);
36static void sync_supers_timer_fn(unsigned long);
37static void arm_supers_timer(void);
38
39static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
27 40
28#ifdef CONFIG_DEBUG_FS 41#ifdef CONFIG_DEBUG_FS
29#include <linux/debugfs.h> 42#include <linux/debugfs.h>
@@ -187,6 +200,13 @@ static int __init default_bdi_init(void)
187{ 200{
188 int err; 201 int err;
189 202
203 sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
204 BUG_ON(IS_ERR(sync_supers_tsk));
205
206 init_timer(&sync_supers_timer);
207 setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
208 arm_supers_timer();
209
190 err = bdi_init(&default_backing_dev_info); 210 err = bdi_init(&default_backing_dev_info);
191 if (!err) 211 if (!err)
192 bdi_register(&default_backing_dev_info, NULL, "default"); 212 bdi_register(&default_backing_dev_info, NULL, "default");
@@ -195,6 +215,242 @@ static int __init default_bdi_init(void)
195} 215}
196subsys_initcall(default_bdi_init); 216subsys_initcall(default_bdi_init);
197 217
218static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
219{
220 memset(wb, 0, sizeof(*wb));
221
222 wb->bdi = bdi;
223 wb->last_old_flush = jiffies;
224 INIT_LIST_HEAD(&wb->b_dirty);
225 INIT_LIST_HEAD(&wb->b_io);
226 INIT_LIST_HEAD(&wb->b_more_io);
227}
228
229static void bdi_task_init(struct backing_dev_info *bdi,
230 struct bdi_writeback *wb)
231{
232 struct task_struct *tsk = current;
233
234 spin_lock(&bdi->wb_lock);
235 list_add_tail_rcu(&wb->list, &bdi->wb_list);
236 spin_unlock(&bdi->wb_lock);
237
238 tsk->flags |= PF_FLUSHER | PF_SWAPWRITE;
239 set_freezable();
240
241 /*
242 * Our parent may run at a different priority, just set us to normal
243 */
244 set_user_nice(tsk, 0);
245}
246
247static int bdi_start_fn(void *ptr)
248{
249 struct bdi_writeback *wb = ptr;
250 struct backing_dev_info *bdi = wb->bdi;
251 int ret;
252
253 /*
254 * Add us to the active bdi_list
255 */
256 spin_lock(&bdi_lock);
257 list_add(&bdi->bdi_list, &bdi_list);
258 spin_unlock(&bdi_lock);
259
260 bdi_task_init(bdi, wb);
261
262 /*
263 * Clear pending bit and wakeup anybody waiting to tear us down
264 */
265 clear_bit(BDI_pending, &bdi->state);
266 smp_mb__after_clear_bit();
267 wake_up_bit(&bdi->state, BDI_pending);
268
269 ret = bdi_writeback_task(wb);
270
271 /*
272 * Remove us from the list
273 */
274 spin_lock(&bdi->wb_lock);
275 list_del_rcu(&wb->list);
276 spin_unlock(&bdi->wb_lock);
277
278 /*
279 * Flush any work that raced with us exiting. No new work
280 * will be added, since this bdi isn't discoverable anymore.
281 */
282 if (!list_empty(&bdi->work_list))
283 wb_do_writeback(wb, 1);
284
285 wb->task = NULL;
286 return ret;
287}
288
289int bdi_has_dirty_io(struct backing_dev_info *bdi)
290{
291 return wb_has_dirty_io(&bdi->wb);
292}
293
294static void bdi_flush_io(struct backing_dev_info *bdi)
295{
296 struct writeback_control wbc = {
297 .bdi = bdi,
298 .sync_mode = WB_SYNC_NONE,
299 .older_than_this = NULL,
300 .range_cyclic = 1,
301 .nr_to_write = 1024,
302 };
303
304 writeback_inodes_wbc(&wbc);
305}
306
307/*
308 * kupdated() used to do this. We cannot do it from the bdi_forker_task()
309 * or we risk deadlocking on ->s_umount. The longer term solution would be
310 * to implement sync_supers_bdi() or similar and simply do it from the
311 * bdi writeback tasks individually.
312 */
313static int bdi_sync_supers(void *unused)
314{
315 set_user_nice(current, 0);
316
317 while (!kthread_should_stop()) {
318 set_current_state(TASK_INTERRUPTIBLE);
319 schedule();
320
321 /*
322 * Do this periodically, like kupdated() did before.
323 */
324 sync_supers();
325 }
326
327 return 0;
328}
329
330static void arm_supers_timer(void)
331{
332 unsigned long next;
333
334 next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
335 mod_timer(&sync_supers_timer, round_jiffies_up(next));
336}
337
338static void sync_supers_timer_fn(unsigned long unused)
339{
340 wake_up_process(sync_supers_tsk);
341 arm_supers_timer();
342}
343
344static int bdi_forker_task(void *ptr)
345{
346 struct bdi_writeback *me = ptr;
347
348 bdi_task_init(me->bdi, me);
349
350 for (;;) {
351 struct backing_dev_info *bdi, *tmp;
352 struct bdi_writeback *wb;
353
354 /*
355 * Temporary measure, we want to make sure we don't see
356 * dirty data on the default backing_dev_info
357 */
358 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
359 wb_do_writeback(me, 0);
360
361 spin_lock(&bdi_lock);
362
363 /*
364 * Check if any existing bdi's have dirty data without
365 * a thread registered. If so, set that up.
366 */
367 list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
368 if (bdi->wb.task)
369 continue;
370 if (list_empty(&bdi->work_list) &&
371 !bdi_has_dirty_io(bdi))
372 continue;
373
374 bdi_add_default_flusher_task(bdi);
375 }
376
377 set_current_state(TASK_INTERRUPTIBLE);
378
379 if (list_empty(&bdi_pending_list)) {
380 unsigned long wait;
381
382 spin_unlock(&bdi_lock);
383 wait = msecs_to_jiffies(dirty_writeback_interval * 10);
384 schedule_timeout(wait);
385 try_to_freeze();
386 continue;
387 }
388
389 __set_current_state(TASK_RUNNING);
390
391 /*
392 * This is our real job - check for pending entries in
393 * bdi_pending_list, and create the tasks that got added
394 */
395 bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
396 bdi_list);
397 list_del_init(&bdi->bdi_list);
398 spin_unlock(&bdi_lock);
399
400 wb = &bdi->wb;
401 wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
402 dev_name(bdi->dev));
403 /*
404 * If task creation fails, then readd the bdi to
405 * the pending list and force writeout of the bdi
406 * from this forker thread. That will free some memory
407 * and we can try again.
408 */
409 if (IS_ERR(wb->task)) {
410 wb->task = NULL;
411
412 /*
413 * Add this 'bdi' to the back, so we get
414 * a chance to flush other bdi's to free
415 * memory.
416 */
417 spin_lock(&bdi_lock);
418 list_add_tail(&bdi->bdi_list, &bdi_pending_list);
419 spin_unlock(&bdi_lock);
420
421 bdi_flush_io(bdi);
422 }
423 }
424
425 return 0;
426}
427
428/*
429 * Add the default flusher task that gets created for any bdi
430 * that has dirty data pending writeout
431 */
432void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
433{
434 if (!bdi_cap_writeback_dirty(bdi))
435 return;
436
437 /*
438 * Check with the helper whether to proceed adding a task. Will only
439 * abort if we two or more simultanous calls to
440 * bdi_add_default_flusher_task() occured, further additions will block
441 * waiting for previous additions to finish.
442 */
443 if (!test_and_set_bit(BDI_pending, &bdi->state)) {
444 list_move_tail(&bdi->bdi_list, &bdi_pending_list);
445
446 /*
447 * We are now on the pending list, wake up bdi_forker_task()
448 * to finish the job and add us back to the active bdi_list
449 */
450 wake_up_process(default_backing_dev_info.wb.task);
451 }
452}
453
198int bdi_register(struct backing_dev_info *bdi, struct device *parent, 454int bdi_register(struct backing_dev_info *bdi, struct device *parent,
199 const char *fmt, ...) 455 const char *fmt, ...)
200{ 456{
@@ -213,13 +469,34 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
213 goto exit; 469 goto exit;
214 } 470 }
215 471
216 mutex_lock(&bdi_lock); 472 spin_lock(&bdi_lock);
217 list_add_tail(&bdi->bdi_list, &bdi_list); 473 list_add_tail(&bdi->bdi_list, &bdi_list);
218 mutex_unlock(&bdi_lock); 474 spin_unlock(&bdi_lock);
219 475
220 bdi->dev = dev; 476 bdi->dev = dev;
221 bdi_debug_register(bdi, dev_name(dev));
222 477
478 /*
479 * Just start the forker thread for our default backing_dev_info,
480 * and add other bdi's to the list. They will get a thread created
481 * on-demand when they need it.
482 */
483 if (bdi_cap_flush_forker(bdi)) {
484 struct bdi_writeback *wb = &bdi->wb;
485
486 wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
487 dev_name(dev));
488 if (IS_ERR(wb->task)) {
489 wb->task = NULL;
490 ret = -ENOMEM;
491
492 spin_lock(&bdi_lock);
493 list_del(&bdi->bdi_list);
494 spin_unlock(&bdi_lock);
495 goto exit;
496 }
497 }
498
499 bdi_debug_register(bdi, dev_name(dev));
223exit: 500exit:
224 return ret; 501 return ret;
225} 502}
@@ -231,17 +508,42 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
231} 508}
232EXPORT_SYMBOL(bdi_register_dev); 509EXPORT_SYMBOL(bdi_register_dev);
233 510
234static void bdi_remove_from_list(struct backing_dev_info *bdi) 511/*
512 * Remove bdi from the global list and shutdown any threads we have running
513 */
514static void bdi_wb_shutdown(struct backing_dev_info *bdi)
235{ 515{
236 mutex_lock(&bdi_lock); 516 struct bdi_writeback *wb;
517
518 if (!bdi_cap_writeback_dirty(bdi))
519 return;
520
521 /*
522 * If setup is pending, wait for that to complete first
523 */
524 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
525 TASK_UNINTERRUPTIBLE);
526
527 /*
528 * Make sure nobody finds us on the bdi_list anymore
529 */
530 spin_lock(&bdi_lock);
237 list_del(&bdi->bdi_list); 531 list_del(&bdi->bdi_list);
238 mutex_unlock(&bdi_lock); 532 spin_unlock(&bdi_lock);
533
534 /*
535 * Finally, kill the kernel threads. We don't need to be RCU
536 * safe anymore, since the bdi is gone from visibility.
537 */
538 list_for_each_entry(wb, &bdi->wb_list, list)
539 kthread_stop(wb->task);
239} 540}
240 541
241void bdi_unregister(struct backing_dev_info *bdi) 542void bdi_unregister(struct backing_dev_info *bdi)
242{ 543{
243 if (bdi->dev) { 544 if (bdi->dev) {
244 bdi_remove_from_list(bdi); 545 if (!bdi_cap_flush_forker(bdi))
546 bdi_wb_shutdown(bdi);
245 bdi_debug_unregister(bdi); 547 bdi_debug_unregister(bdi);
246 device_unregister(bdi->dev); 548 device_unregister(bdi->dev);
247 bdi->dev = NULL; 549 bdi->dev = NULL;
@@ -251,18 +553,25 @@ EXPORT_SYMBOL(bdi_unregister);
251 553
252int bdi_init(struct backing_dev_info *bdi) 554int bdi_init(struct backing_dev_info *bdi)
253{ 555{
254 int i; 556 int i, err;
255 int err;
256 557
257 bdi->dev = NULL; 558 bdi->dev = NULL;
258 559
259 bdi->min_ratio = 0; 560 bdi->min_ratio = 0;
260 bdi->max_ratio = 100; 561 bdi->max_ratio = 100;
261 bdi->max_prop_frac = PROP_FRAC_BASE; 562 bdi->max_prop_frac = PROP_FRAC_BASE;
563 spin_lock_init(&bdi->wb_lock);
262 INIT_LIST_HEAD(&bdi->bdi_list); 564 INIT_LIST_HEAD(&bdi->bdi_list);
263 INIT_LIST_HEAD(&bdi->b_io); 565 INIT_LIST_HEAD(&bdi->wb_list);
264 INIT_LIST_HEAD(&bdi->b_dirty); 566 INIT_LIST_HEAD(&bdi->work_list);
265 INIT_LIST_HEAD(&bdi->b_more_io); 567
568 bdi_wb_init(&bdi->wb, bdi);
569
570 /*
571 * Just one thread support for now, hard code mask and count
572 */
573 bdi->wb_mask = 1;
574 bdi->wb_cnt = 1;
266 575
267 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 576 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
268 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 577 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -277,8 +586,6 @@ int bdi_init(struct backing_dev_info *bdi)
277err: 586err:
278 while (i--) 587 while (i--)
279 percpu_counter_destroy(&bdi->bdi_stat[i]); 588 percpu_counter_destroy(&bdi->bdi_stat[i]);
280
281 bdi_remove_from_list(bdi);
282 } 589 }
283 590
284 return err; 591 return err;
@@ -289,9 +596,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
289{ 596{
290 int i; 597 int i;
291 598
292 WARN_ON(!list_empty(&bdi->b_dirty)); 599 WARN_ON(bdi_has_dirty_io(bdi));
293 WARN_ON(!list_empty(&bdi->b_io));
294 WARN_ON(!list_empty(&bdi->b_more_io));
295 600
296 bdi_unregister(bdi); 601 bdi_unregister(bdi);
297 602