aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2009-09-02 03:19:46 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-09-11 03:20:25 -0400
commit66f3b8e2e103a0b93b945764d98e9ba46cb926dd (patch)
tree442bf5664214f0a1448e4010b09868cc58fdd3d1
parentd8a8559cd7a9ccac98d5f6f13297a2ff68a43627 (diff)
writeback: move dirty inodes from super_block to backing_dev_info
This is a first step at introducing per-bdi flusher threads. We should have no change in behaviour, although sb_has_dirty_inodes() is now ridiculously expensive, as there's no easy way to answer that question. Not a huge problem, since it'll be deleted in subsequent patches. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--fs/fs-writeback.c197
-rw-r--r--fs/super.c3
-rw-r--r--include/linux/backing-dev.h9
-rw-r--r--include/linux/fs.h5
-rw-r--r--mm/backing-dev.c24
-rw-r--r--mm/page-writeback.c11
6 files changed, 165 insertions, 84 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 271e5f44e871..45ad4bb700e6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,6 +25,7 @@
25#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
26#include "internal.h" 26#include "internal.h"
27 27
28#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
28 29
29/** 30/**
30 * writeback_acquire - attempt to get exclusive writeback access to a device 31 * writeback_acquire - attempt to get exclusive writeback access to a device
@@ -165,12 +166,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
165 goto out; 166 goto out;
166 167
167 /* 168 /*
168 * If the inode was already on s_dirty/s_io/s_more_io, don't 169 * If the inode was already on b_dirty/b_io/b_more_io, don't
169 * reposition it (that would break s_dirty time-ordering). 170 * reposition it (that would break b_dirty time-ordering).
170 */ 171 */
171 if (!was_dirty) { 172 if (!was_dirty) {
172 inode->dirtied_when = jiffies; 173 inode->dirtied_when = jiffies;
173 list_move(&inode->i_list, &sb->s_dirty); 174 list_move(&inode->i_list,
175 &inode_to_bdi(inode)->b_dirty);
174 } 176 }
175 } 177 }
176out: 178out:
@@ -191,31 +193,30 @@ static int write_inode(struct inode *inode, int sync)
191 * furthest end of its superblock's dirty-inode list. 193 * furthest end of its superblock's dirty-inode list.
192 * 194 *
193 * Before stamping the inode's ->dirtied_when, we check to see whether it is 195 * Before stamping the inode's ->dirtied_when, we check to see whether it is
194 * already the most-recently-dirtied inode on the s_dirty list. If that is 196 * already the most-recently-dirtied inode on the b_dirty list. If that is
195 * the case then the inode must have been redirtied while it was being written 197 * the case then the inode must have been redirtied while it was being written
196 * out and we don't reset its dirtied_when. 198 * out and we don't reset its dirtied_when.
197 */ 199 */
198static void redirty_tail(struct inode *inode) 200static void redirty_tail(struct inode *inode)
199{ 201{
200 struct super_block *sb = inode->i_sb; 202 struct backing_dev_info *bdi = inode_to_bdi(inode);
201 203
202 if (!list_empty(&sb->s_dirty)) { 204 if (!list_empty(&bdi->b_dirty)) {
203 struct inode *tail_inode; 205 struct inode *tail;
204 206
205 tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); 207 tail = list_entry(bdi->b_dirty.next, struct inode, i_list);
206 if (time_before(inode->dirtied_when, 208 if (time_before(inode->dirtied_when, tail->dirtied_when))
207 tail_inode->dirtied_when))
208 inode->dirtied_when = jiffies; 209 inode->dirtied_when = jiffies;
209 } 210 }
210 list_move(&inode->i_list, &sb->s_dirty); 211 list_move(&inode->i_list, &bdi->b_dirty);
211} 212}
212 213
213/* 214/*
214 * requeue inode for re-scanning after sb->s_io list is exhausted. 215 * requeue inode for re-scanning after bdi->b_io list is exhausted.
215 */ 216 */
216static void requeue_io(struct inode *inode) 217static void requeue_io(struct inode *inode)
217{ 218{
218 list_move(&inode->i_list, &inode->i_sb->s_more_io); 219 list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io);
219} 220}
220 221
221static void inode_sync_complete(struct inode *inode) 222static void inode_sync_complete(struct inode *inode)
@@ -262,18 +263,50 @@ static void move_expired_inodes(struct list_head *delaying_queue,
262/* 263/*
263 * Queue all expired dirty inodes for io, eldest first. 264 * Queue all expired dirty inodes for io, eldest first.
264 */ 265 */
265static void queue_io(struct super_block *sb, 266static void queue_io(struct backing_dev_info *bdi,
266 unsigned long *older_than_this) 267 unsigned long *older_than_this)
268{
269 list_splice_init(&bdi->b_more_io, bdi->b_io.prev);
270 move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this);
271}
272
273static int sb_on_inode_list(struct super_block *sb, struct list_head *list)
267{ 274{
268 list_splice_init(&sb->s_more_io, sb->s_io.prev); 275 struct inode *inode;
269 move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); 276 int ret = 0;
277
278 spin_lock(&inode_lock);
279 list_for_each_entry(inode, list, i_list) {
280 if (inode->i_sb == sb) {
281 ret = 1;
282 break;
283 }
284 }
285 spin_unlock(&inode_lock);
286 return ret;
270} 287}
271 288
272int sb_has_dirty_inodes(struct super_block *sb) 289int sb_has_dirty_inodes(struct super_block *sb)
273{ 290{
274 return !list_empty(&sb->s_dirty) || 291 struct backing_dev_info *bdi;
275 !list_empty(&sb->s_io) || 292 int ret = 0;
276 !list_empty(&sb->s_more_io); 293
294 /*
295 * This is REALLY expensive right now, but it'll go away
296 * when the bdi writeback is introduced
297 */
298 mutex_lock(&bdi_lock);
299 list_for_each_entry(bdi, &bdi_list, bdi_list) {
300 if (sb_on_inode_list(sb, &bdi->b_dirty) ||
301 sb_on_inode_list(sb, &bdi->b_io) ||
302 sb_on_inode_list(sb, &bdi->b_more_io)) {
303 ret = 1;
304 break;
305 }
306 }
307 mutex_unlock(&bdi_lock);
308
309 return ret;
277} 310}
278EXPORT_SYMBOL(sb_has_dirty_inodes); 311EXPORT_SYMBOL(sb_has_dirty_inodes);
279 312
@@ -322,11 +355,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
322 if (inode->i_state & I_SYNC) { 355 if (inode->i_state & I_SYNC) {
323 /* 356 /*
324 * If this inode is locked for writeback and we are not doing 357 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that 358 * writeback-for-data-integrity, move it to b_more_io so that
326 * writeback can proceed with the other inodes on s_io. 359 * writeback can proceed with the other inodes on s_io.
327 * 360 *
328 * We'll have another go at writing back this inode when we 361 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io. 362 * completed a full scan of b_io.
330 */ 363 */
331 if (!wait) { 364 if (!wait) {
332 requeue_io(inode); 365 requeue_io(inode);
@@ -371,11 +404,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
371 /* 404 /*
372 * We didn't write back all the pages. nfs_writepages() 405 * We didn't write back all the pages. nfs_writepages()
373 * sometimes bales out without doing anything. Redirty 406 * sometimes bales out without doing anything. Redirty
374 * the inode; Move it from s_io onto s_more_io/s_dirty. 407 * the inode; Move it from b_io onto b_more_io/b_dirty.
375 */ 408 */
376 /* 409 /*
377 * akpm: if the caller was the kupdate function we put 410 * akpm: if the caller was the kupdate function we put
378 * this inode at the head of s_dirty so it gets first 411 * this inode at the head of b_dirty so it gets first
379 * consideration. Otherwise, move it to the tail, for 412 * consideration. Otherwise, move it to the tail, for
380 * the reasons described there. I'm not really sure 413 * the reasons described there. I'm not really sure
381 * how much sense this makes. Presumably I had a good 414 * how much sense this makes. Presumably I had a good
@@ -385,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
385 if (wbc->for_kupdate) { 418 if (wbc->for_kupdate) {
386 /* 419 /*
387 * For the kupdate function we move the inode 420 * For the kupdate function we move the inode
388 * to s_more_io so it will get more writeout as 421 * to b_more_io so it will get more writeout as
389 * soon as the queue becomes uncongested. 422 * soon as the queue becomes uncongested.
390 */ 423 */
391 inode->i_state |= I_DIRTY_PAGES; 424 inode->i_state |= I_DIRTY_PAGES;
@@ -433,51 +466,34 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
433 return ret; 466 return ret;
434} 467}
435 468
436/* 469static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
437 * Write out a superblock's list of dirty inodes. A wait will be performed 470 struct writeback_control *wbc,
438 * upon no inodes, all inodes or the final one, depending upon sync_mode. 471 struct super_block *sb)
439 *
440 * If older_than_this is non-NULL, then only write out inodes which
441 * had their first dirtying at a time earlier than *older_than_this.
442 *
443 * If we're a pdflush thread, then implement pdflush collision avoidance
444 * against the entire list.
445 *
446 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
447 * This function assumes that the blockdev superblock's inodes are backed by
448 * a variety of queues, so all inodes are searched. For other superblocks,
449 * assume that all inodes are backed by the same queue.
450 *
451 * FIXME: this linear search could get expensive with many fileystems. But
452 * how to fix? We need to go from an address_space to all inodes which share
453 * a queue with that address_space. (Easy: have a global "dirty superblocks"
454 * list).
455 *
456 * The inodes to be written are parked on sb->s_io. They are moved back onto
457 * sb->s_dirty as they are selected for writing. This way, none can be missed
458 * on the writer throttling path, and we get decent balancing between many
459 * throttled threads: we don't want them all piling up on inode_sync_wait.
460 */
461static void generic_sync_sb_inodes(struct super_block *sb,
462 struct writeback_control *wbc)
463{ 472{
473 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
464 const unsigned long start = jiffies; /* livelock avoidance */ 474 const unsigned long start = jiffies; /* livelock avoidance */
465 int sync = wbc->sync_mode == WB_SYNC_ALL;
466 475
467 spin_lock(&inode_lock); 476 spin_lock(&inode_lock);
468 if (!wbc->for_kupdate || list_empty(&sb->s_io))
469 queue_io(sb, wbc->older_than_this);
470 477
471 while (!list_empty(&sb->s_io)) { 478 if (!wbc->for_kupdate || list_empty(&bdi->b_io))
472 struct inode *inode = list_entry(sb->s_io.prev, 479 queue_io(bdi, wbc->older_than_this);
480
481 while (!list_empty(&bdi->b_io)) {
482 struct inode *inode = list_entry(bdi->b_io.prev,
473 struct inode, i_list); 483 struct inode, i_list);
474 struct address_space *mapping = inode->i_mapping;
475 struct backing_dev_info *bdi = mapping->backing_dev_info;
476 long pages_skipped; 484 long pages_skipped;
477 485
486 /*
487 * super block given and doesn't match, skip this inode
488 */
489 if (sb && sb != inode->i_sb) {
490 redirty_tail(inode);
491 continue;
492 }
493
478 if (!bdi_cap_writeback_dirty(bdi)) { 494 if (!bdi_cap_writeback_dirty(bdi)) {
479 redirty_tail(inode); 495 redirty_tail(inode);
480 if (sb_is_blkdev_sb(sb)) { 496 if (is_blkdev_sb) {
481 /* 497 /*
482 * Dirty memory-backed blockdev: the ramdisk 498 * Dirty memory-backed blockdev: the ramdisk
483 * driver does this. Skip just this inode 499 * driver does this. Skip just this inode
@@ -499,14 +515,14 @@ static void generic_sync_sb_inodes(struct super_block *sb,
499 515
500 if (wbc->nonblocking && bdi_write_congested(bdi)) { 516 if (wbc->nonblocking && bdi_write_congested(bdi)) {
501 wbc->encountered_congestion = 1; 517 wbc->encountered_congestion = 1;
502 if (!sb_is_blkdev_sb(sb)) 518 if (!is_blkdev_sb)
503 break; /* Skip a congested fs */ 519 break; /* Skip a congested fs */
504 requeue_io(inode); 520 requeue_io(inode);
505 continue; /* Skip a congested blockdev */ 521 continue; /* Skip a congested blockdev */
506 } 522 }
507 523
508 if (wbc->bdi && bdi != wbc->bdi) { 524 if (wbc->bdi && bdi != wbc->bdi) {
509 if (!sb_is_blkdev_sb(sb)) 525 if (!is_blkdev_sb)
510 break; /* fs has the wrong queue */ 526 break; /* fs has the wrong queue */
511 requeue_io(inode); 527 requeue_io(inode);
512 continue; /* blockdev has wrong queue */ 528 continue; /* blockdev has wrong queue */
@@ -544,13 +560,57 @@ static void generic_sync_sb_inodes(struct super_block *sb,
544 wbc->more_io = 1; 560 wbc->more_io = 1;
545 break; 561 break;
546 } 562 }
547 if (!list_empty(&sb->s_more_io)) 563 if (!list_empty(&bdi->b_more_io))
548 wbc->more_io = 1; 564 wbc->more_io = 1;
549 } 565 }
550 566
551 if (sync) { 567 spin_unlock(&inode_lock);
568 /* Leave any unwritten inodes on b_io */
569}
570
571/*
572 * Write out a superblock's list of dirty inodes. A wait will be performed
573 * upon no inodes, all inodes or the final one, depending upon sync_mode.
574 *
575 * If older_than_this is non-NULL, then only write out inodes which
576 * had their first dirtying at a time earlier than *older_than_this.
577 *
578 * If we're a pdlfush thread, then implement pdflush collision avoidance
579 * against the entire list.
580 *
581 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
582 * This function assumes that the blockdev superblock's inodes are backed by
583 * a variety of queues, so all inodes are searched. For other superblocks,
584 * assume that all inodes are backed by the same queue.
585 *
586 * FIXME: this linear search could get expensive with many fileystems. But
587 * how to fix? We need to go from an address_space to all inodes which share
588 * a queue with that address_space. (Easy: have a global "dirty superblocks"
589 * list).
590 *
591 * The inodes to be written are parked on bdi->b_io. They are moved back onto
592 * bdi->b_dirty as they are selected for writing. This way, none can be missed
593 * on the writer throttling path, and we get decent balancing between many
594 * throttled threads: we don't want them all piling up on inode_sync_wait.
595 */
596static void generic_sync_sb_inodes(struct super_block *sb,
597 struct writeback_control *wbc)
598{
599 struct backing_dev_info *bdi;
600
601 if (!wbc->bdi) {
602 mutex_lock(&bdi_lock);
603 list_for_each_entry(bdi, &bdi_list, bdi_list)
604 generic_sync_bdi_inodes(bdi, wbc, sb);
605 mutex_unlock(&bdi_lock);
606 } else
607 generic_sync_bdi_inodes(wbc->bdi, wbc, sb);
608
609 if (wbc->sync_mode == WB_SYNC_ALL) {
552 struct inode *inode, *old_inode = NULL; 610 struct inode *inode, *old_inode = NULL;
553 611
612 spin_lock(&inode_lock);
613
554 /* 614 /*
555 * Data integrity sync. Must wait for all pages under writeback, 615 * Data integrity sync. Must wait for all pages under writeback,
556 * because there may have been pages dirtied before our sync 616 * because there may have been pages dirtied before our sync
@@ -588,10 +648,7 @@ static void generic_sync_sb_inodes(struct super_block *sb,
588 } 648 }
589 spin_unlock(&inode_lock); 649 spin_unlock(&inode_lock);
590 iput(old_inode); 650 iput(old_inode);
591 } else 651 }
592 spin_unlock(&inode_lock);
593
594 return; /* Leave any unwritten inodes on s_io */
595} 652}
596 653
597/* 654/*
@@ -599,8 +656,8 @@ static void generic_sync_sb_inodes(struct super_block *sb,
599 * 656 *
600 * Note: 657 * Note:
601 * We don't need to grab a reference to superblock here. If it has non-empty 658 * We don't need to grab a reference to superblock here. If it has non-empty
602 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed 659 * ->b_dirty it's hadn't been killed yet and kill_super() won't proceed
603 * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all 660 * past sync_inodes_sb() until the ->b_dirty/b_io/b_more_io lists are all
604 * empty. Since __sync_single_inode() regains inode_lock before it finally moves 661 * empty. Since __sync_single_inode() regains inode_lock before it finally moves
605 * inode from superblock lists we are OK. 662 * inode from superblock lists we are OK.
606 * 663 *
diff --git a/fs/super.c b/fs/super.c
index 2761d3e22ed9..0d22ce3be4aa 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
62 s = NULL; 62 s = NULL;
63 goto out; 63 goto out;
64 } 64 }
65 INIT_LIST_HEAD(&s->s_dirty);
66 INIT_LIST_HEAD(&s->s_io);
67 INIT_LIST_HEAD(&s->s_more_io);
68 INIT_LIST_HEAD(&s->s_files); 65 INIT_LIST_HEAD(&s->s_files);
69 INIT_LIST_HEAD(&s->s_instances); 66 INIT_LIST_HEAD(&s->s_instances);
70 INIT_HLIST_HEAD(&s->s_anon); 67 INIT_HLIST_HEAD(&s->s_anon);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 1d52425a6118..928cd5484f4d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -40,6 +40,8 @@ enum bdi_stat_item {
40#define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) 40#define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
41 41
42struct backing_dev_info { 42struct backing_dev_info {
43 struct list_head bdi_list;
44
43 unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ 45 unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
44 unsigned long state; /* Always use atomic bitops on this */ 46 unsigned long state; /* Always use atomic bitops on this */
45 unsigned int capabilities; /* Device capabilities */ 47 unsigned int capabilities; /* Device capabilities */
@@ -58,6 +60,10 @@ struct backing_dev_info {
58 60
59 struct device *dev; 61 struct device *dev;
60 62
63 struct list_head b_dirty; /* dirty inodes */
64 struct list_head b_io; /* parked for writeback */
65 struct list_head b_more_io; /* parked for more writeback */
66
61#ifdef CONFIG_DEBUG_FS 67#ifdef CONFIG_DEBUG_FS
62 struct dentry *debug_dir; 68 struct dentry *debug_dir;
63 struct dentry *debug_stats; 69 struct dentry *debug_stats;
@@ -72,6 +78,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
72int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); 78int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
73void bdi_unregister(struct backing_dev_info *bdi); 79void bdi_unregister(struct backing_dev_info *bdi);
74 80
81extern struct mutex bdi_lock;
82extern struct list_head bdi_list;
83
75static inline void __add_bdi_stat(struct backing_dev_info *bdi, 84static inline void __add_bdi_stat(struct backing_dev_info *bdi,
76 enum bdi_stat_item item, s64 amount) 85 enum bdi_stat_item item, s64 amount)
77{ 86{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 46ff7dd6e164..56371be1be65 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -715,7 +715,7 @@ struct posix_acl;
715 715
716struct inode { 716struct inode {
717 struct hlist_node i_hash; 717 struct hlist_node i_hash;
718 struct list_head i_list; 718 struct list_head i_list; /* backing dev IO list */
719 struct list_head i_sb_list; 719 struct list_head i_sb_list;
720 struct list_head i_dentry; 720 struct list_head i_dentry;
721 unsigned long i_ino; 721 unsigned long i_ino;
@@ -1336,9 +1336,6 @@ struct super_block {
1336 struct xattr_handler **s_xattr; 1336 struct xattr_handler **s_xattr;
1337 1337
1338 struct list_head s_inodes; /* all inodes */ 1338 struct list_head s_inodes; /* all inodes */
1339 struct list_head s_dirty; /* dirty inodes */
1340 struct list_head s_io; /* parked for writeback */
1341 struct list_head s_more_io; /* parked for more writeback */
1342 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ 1339 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
1343 struct list_head s_files; 1340 struct list_head s_files;
1344 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ 1341 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c86edd244294..6f163e0f0509 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -22,6 +22,8 @@ struct backing_dev_info default_backing_dev_info = {
22EXPORT_SYMBOL_GPL(default_backing_dev_info); 22EXPORT_SYMBOL_GPL(default_backing_dev_info);
23 23
24static struct class *bdi_class; 24static struct class *bdi_class;
25DEFINE_MUTEX(bdi_lock);
26LIST_HEAD(bdi_list);
25 27
26#ifdef CONFIG_DEBUG_FS 28#ifdef CONFIG_DEBUG_FS
27#include <linux/debugfs.h> 29#include <linux/debugfs.h>
@@ -211,6 +213,10 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
211 goto exit; 213 goto exit;
212 } 214 }
213 215
216 mutex_lock(&bdi_lock);
217 list_add_tail(&bdi->bdi_list, &bdi_list);
218 mutex_unlock(&bdi_lock);
219
214 bdi->dev = dev; 220 bdi->dev = dev;
215 bdi_debug_register(bdi, dev_name(dev)); 221 bdi_debug_register(bdi, dev_name(dev));
216 222
@@ -225,9 +231,17 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
225} 231}
226EXPORT_SYMBOL(bdi_register_dev); 232EXPORT_SYMBOL(bdi_register_dev);
227 233
234static void bdi_remove_from_list(struct backing_dev_info *bdi)
235{
236 mutex_lock(&bdi_lock);
237 list_del(&bdi->bdi_list);
238 mutex_unlock(&bdi_lock);
239}
240
228void bdi_unregister(struct backing_dev_info *bdi) 241void bdi_unregister(struct backing_dev_info *bdi)
229{ 242{
230 if (bdi->dev) { 243 if (bdi->dev) {
244 bdi_remove_from_list(bdi);
231 bdi_debug_unregister(bdi); 245 bdi_debug_unregister(bdi);
232 device_unregister(bdi->dev); 246 device_unregister(bdi->dev);
233 bdi->dev = NULL; 247 bdi->dev = NULL;
@@ -245,6 +259,10 @@ int bdi_init(struct backing_dev_info *bdi)
245 bdi->min_ratio = 0; 259 bdi->min_ratio = 0;
246 bdi->max_ratio = 100; 260 bdi->max_ratio = 100;
247 bdi->max_prop_frac = PROP_FRAC_BASE; 261 bdi->max_prop_frac = PROP_FRAC_BASE;
262 INIT_LIST_HEAD(&bdi->bdi_list);
263 INIT_LIST_HEAD(&bdi->b_io);
264 INIT_LIST_HEAD(&bdi->b_dirty);
265 INIT_LIST_HEAD(&bdi->b_more_io);
248 266
249 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 267 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
250 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 268 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -259,6 +277,8 @@ int bdi_init(struct backing_dev_info *bdi)
259err: 277err:
260 while (i--) 278 while (i--)
261 percpu_counter_destroy(&bdi->bdi_stat[i]); 279 percpu_counter_destroy(&bdi->bdi_stat[i]);
280
281 bdi_remove_from_list(bdi);
262 } 282 }
263 283
264 return err; 284 return err;
@@ -269,6 +289,10 @@ void bdi_destroy(struct backing_dev_info *bdi)
269{ 289{
270 int i; 290 int i;
271 291
292 WARN_ON(!list_empty(&bdi->b_dirty));
293 WARN_ON(!list_empty(&bdi->b_io));
294 WARN_ON(!list_empty(&bdi->b_more_io));
295
272 bdi_unregister(bdi); 296 bdi_unregister(bdi);
273 297
274 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 298 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 81627ebcd313..f8341b6019bf 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -320,15 +320,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty)
320/* 320/*
321 * 321 *
322 */ 322 */
323static DEFINE_SPINLOCK(bdi_lock);
324static unsigned int bdi_min_ratio; 323static unsigned int bdi_min_ratio;
325 324
326int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) 325int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
327{ 326{
328 int ret = 0; 327 int ret = 0;
329 unsigned long flags;
330 328
331 spin_lock_irqsave(&bdi_lock, flags); 329 mutex_lock(&bdi_lock);
332 if (min_ratio > bdi->max_ratio) { 330 if (min_ratio > bdi->max_ratio) {
333 ret = -EINVAL; 331 ret = -EINVAL;
334 } else { 332 } else {
@@ -340,27 +338,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
340 ret = -EINVAL; 338 ret = -EINVAL;
341 } 339 }
342 } 340 }
343 spin_unlock_irqrestore(&bdi_lock, flags); 341 mutex_unlock(&bdi_lock);
344 342
345 return ret; 343 return ret;
346} 344}
347 345
348int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) 346int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
349{ 347{
350 unsigned long flags;
351 int ret = 0; 348 int ret = 0;
352 349
353 if (max_ratio > 100) 350 if (max_ratio > 100)
354 return -EINVAL; 351 return -EINVAL;
355 352
356 spin_lock_irqsave(&bdi_lock, flags); 353 mutex_lock(&bdi_lock);
357 if (bdi->min_ratio > max_ratio) { 354 if (bdi->min_ratio > max_ratio) {
358 ret = -EINVAL; 355 ret = -EINVAL;
359 } else { 356 } else {
360 bdi->max_ratio = max_ratio; 357 bdi->max_ratio = max_ratio;
361 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; 358 bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
362 } 359 }
363 spin_unlock_irqrestore(&bdi_lock, flags); 360 mutex_unlock(&bdi_lock);
364 361
365 return ret; 362 return ret;
366} 363}