diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2009-09-02 03:19:46 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-09-11 03:20:25 -0400 |
commit | 66f3b8e2e103a0b93b945764d98e9ba46cb926dd (patch) | |
tree | 442bf5664214f0a1448e4010b09868cc58fdd3d1 | |
parent | d8a8559cd7a9ccac98d5f6f13297a2ff68a43627 (diff) |
writeback: move dirty inodes from super_block to backing_dev_info
This is a first step at introducing per-bdi flusher threads. We should
have no change in behaviour, although sb_has_dirty_inodes() is now
ridiculously expensive, as there's no easy way to answer that question.
Not a huge problem, since it'll be deleted in subsequent patches.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/fs-writeback.c | 197 | ||||
-rw-r--r-- | fs/super.c | 3 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 9 | ||||
-rw-r--r-- | include/linux/fs.h | 5 | ||||
-rw-r--r-- | mm/backing-dev.c | 24 | ||||
-rw-r--r-- | mm/page-writeback.c | 11 |
6 files changed, 165 insertions, 84 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 271e5f44e871..45ad4bb700e6 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/buffer_head.h> | 25 | #include <linux/buffer_head.h> |
26 | #include "internal.h" | 26 | #include "internal.h" |
27 | 27 | ||
28 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
28 | 29 | ||
29 | /** | 30 | /** |
30 | * writeback_acquire - attempt to get exclusive writeback access to a device | 31 | * writeback_acquire - attempt to get exclusive writeback access to a device |
@@ -165,12 +166,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
165 | goto out; | 166 | goto out; |
166 | 167 | ||
167 | /* | 168 | /* |
168 | * If the inode was already on s_dirty/s_io/s_more_io, don't | 169 | * If the inode was already on b_dirty/b_io/b_more_io, don't |
169 | * reposition it (that would break s_dirty time-ordering). | 170 | * reposition it (that would break b_dirty time-ordering). |
170 | */ | 171 | */ |
171 | if (!was_dirty) { | 172 | if (!was_dirty) { |
172 | inode->dirtied_when = jiffies; | 173 | inode->dirtied_when = jiffies; |
173 | list_move(&inode->i_list, &sb->s_dirty); | 174 | list_move(&inode->i_list, |
175 | &inode_to_bdi(inode)->b_dirty); | ||
174 | } | 176 | } |
175 | } | 177 | } |
176 | out: | 178 | out: |
@@ -191,31 +193,30 @@ static int write_inode(struct inode *inode, int sync) | |||
191 | * furthest end of its superblock's dirty-inode list. | 193 | * furthest end of its superblock's dirty-inode list. |
192 | * | 194 | * |
193 | * Before stamping the inode's ->dirtied_when, we check to see whether it is | 195 | * Before stamping the inode's ->dirtied_when, we check to see whether it is |
194 | * already the most-recently-dirtied inode on the s_dirty list. If that is | 196 | * already the most-recently-dirtied inode on the b_dirty list. If that is |
195 | * the case then the inode must have been redirtied while it was being written | 197 | * the case then the inode must have been redirtied while it was being written |
196 | * out and we don't reset its dirtied_when. | 198 | * out and we don't reset its dirtied_when. |
197 | */ | 199 | */ |
198 | static void redirty_tail(struct inode *inode) | 200 | static void redirty_tail(struct inode *inode) |
199 | { | 201 | { |
200 | struct super_block *sb = inode->i_sb; | 202 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
201 | 203 | ||
202 | if (!list_empty(&sb->s_dirty)) { | 204 | if (!list_empty(&bdi->b_dirty)) { |
203 | struct inode *tail_inode; | 205 | struct inode *tail; |
204 | 206 | ||
205 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); | 207 | tail = list_entry(bdi->b_dirty.next, struct inode, i_list); |
206 | if (time_before(inode->dirtied_when, | 208 | if (time_before(inode->dirtied_when, tail->dirtied_when)) |
207 | tail_inode->dirtied_when)) | ||
208 | inode->dirtied_when = jiffies; | 209 | inode->dirtied_when = jiffies; |
209 | } | 210 | } |
210 | list_move(&inode->i_list, &sb->s_dirty); | 211 | list_move(&inode->i_list, &bdi->b_dirty); |
211 | } | 212 | } |
212 | 213 | ||
213 | /* | 214 | /* |
214 | * requeue inode for re-scanning after sb->s_io list is exhausted. | 215 | * requeue inode for re-scanning after bdi->b_io list is exhausted. |
215 | */ | 216 | */ |
216 | static void requeue_io(struct inode *inode) | 217 | static void requeue_io(struct inode *inode) |
217 | { | 218 | { |
218 | list_move(&inode->i_list, &inode->i_sb->s_more_io); | 219 | list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io); |
219 | } | 220 | } |
220 | 221 | ||
221 | static void inode_sync_complete(struct inode *inode) | 222 | static void inode_sync_complete(struct inode *inode) |
@@ -262,18 +263,50 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
262 | /* | 263 | /* |
263 | * Queue all expired dirty inodes for io, eldest first. | 264 | * Queue all expired dirty inodes for io, eldest first. |
264 | */ | 265 | */ |
265 | static void queue_io(struct super_block *sb, | 266 | static void queue_io(struct backing_dev_info *bdi, |
266 | unsigned long *older_than_this) | 267 | unsigned long *older_than_this) |
268 | { | ||
269 | list_splice_init(&bdi->b_more_io, bdi->b_io.prev); | ||
270 | move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this); | ||
271 | } | ||
272 | |||
273 | static int sb_on_inode_list(struct super_block *sb, struct list_head *list) | ||
267 | { | 274 | { |
268 | list_splice_init(&sb->s_more_io, sb->s_io.prev); | 275 | struct inode *inode; |
269 | move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); | 276 | int ret = 0; |
277 | |||
278 | spin_lock(&inode_lock); | ||
279 | list_for_each_entry(inode, list, i_list) { | ||
280 | if (inode->i_sb == sb) { | ||
281 | ret = 1; | ||
282 | break; | ||
283 | } | ||
284 | } | ||
285 | spin_unlock(&inode_lock); | ||
286 | return ret; | ||
270 | } | 287 | } |
271 | 288 | ||
272 | int sb_has_dirty_inodes(struct super_block *sb) | 289 | int sb_has_dirty_inodes(struct super_block *sb) |
273 | { | 290 | { |
274 | return !list_empty(&sb->s_dirty) || | 291 | struct backing_dev_info *bdi; |
275 | !list_empty(&sb->s_io) || | 292 | int ret = 0; |
276 | !list_empty(&sb->s_more_io); | 293 | |
294 | /* | ||
295 | * This is REALLY expensive right now, but it'll go away | ||
296 | * when the bdi writeback is introduced | ||
297 | */ | ||
298 | mutex_lock(&bdi_lock); | ||
299 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | ||
300 | if (sb_on_inode_list(sb, &bdi->b_dirty) || | ||
301 | sb_on_inode_list(sb, &bdi->b_io) || | ||
302 | sb_on_inode_list(sb, &bdi->b_more_io)) { | ||
303 | ret = 1; | ||
304 | break; | ||
305 | } | ||
306 | } | ||
307 | mutex_unlock(&bdi_lock); | ||
308 | |||
309 | return ret; | ||
277 | } | 310 | } |
278 | EXPORT_SYMBOL(sb_has_dirty_inodes); | 311 | EXPORT_SYMBOL(sb_has_dirty_inodes); |
279 | 312 | ||
@@ -322,11 +355,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
322 | if (inode->i_state & I_SYNC) { | 355 | if (inode->i_state & I_SYNC) { |
323 | /* | 356 | /* |
324 | * If this inode is locked for writeback and we are not doing | 357 | * If this inode is locked for writeback and we are not doing |
325 | * writeback-for-data-integrity, move it to s_more_io so that | 358 | * writeback-for-data-integrity, move it to b_more_io so that |
326 | * writeback can proceed with the other inodes on s_io. | 359 | * writeback can proceed with the other inodes on s_io. |
327 | * | 360 | * |
328 | * We'll have another go at writing back this inode when we | 361 | * We'll have another go at writing back this inode when we |
329 | * completed a full scan of s_io. | 362 | * completed a full scan of b_io. |
330 | */ | 363 | */ |
331 | if (!wait) { | 364 | if (!wait) { |
332 | requeue_io(inode); | 365 | requeue_io(inode); |
@@ -371,11 +404,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
371 | /* | 404 | /* |
372 | * We didn't write back all the pages. nfs_writepages() | 405 | * We didn't write back all the pages. nfs_writepages() |
373 | * sometimes bales out without doing anything. Redirty | 406 | * sometimes bales out without doing anything. Redirty |
374 | * the inode; Move it from s_io onto s_more_io/s_dirty. | 407 | * the inode; Move it from b_io onto b_more_io/b_dirty. |
375 | */ | 408 | */ |
376 | /* | 409 | /* |
377 | * akpm: if the caller was the kupdate function we put | 410 | * akpm: if the caller was the kupdate function we put |
378 | * this inode at the head of s_dirty so it gets first | 411 | * this inode at the head of b_dirty so it gets first |
379 | * consideration. Otherwise, move it to the tail, for | 412 | * consideration. Otherwise, move it to the tail, for |
380 | * the reasons described there. I'm not really sure | 413 | * the reasons described there. I'm not really sure |
381 | * how much sense this makes. Presumably I had a good | 414 | * how much sense this makes. Presumably I had a good |
@@ -385,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
385 | if (wbc->for_kupdate) { | 418 | if (wbc->for_kupdate) { |
386 | /* | 419 | /* |
387 | * For the kupdate function we move the inode | 420 | * For the kupdate function we move the inode |
388 | * to s_more_io so it will get more writeout as | 421 | * to b_more_io so it will get more writeout as |
389 | * soon as the queue becomes uncongested. | 422 | * soon as the queue becomes uncongested. |
390 | */ | 423 | */ |
391 | inode->i_state |= I_DIRTY_PAGES; | 424 | inode->i_state |= I_DIRTY_PAGES; |
@@ -433,51 +466,34 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
433 | return ret; | 466 | return ret; |
434 | } | 467 | } |
435 | 468 | ||
436 | /* | 469 | static void generic_sync_bdi_inodes(struct backing_dev_info *bdi, |
437 | * Write out a superblock's list of dirty inodes. A wait will be performed | 470 | struct writeback_control *wbc, |
438 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | 471 | struct super_block *sb) |
439 | * | ||
440 | * If older_than_this is non-NULL, then only write out inodes which | ||
441 | * had their first dirtying at a time earlier than *older_than_this. | ||
442 | * | ||
443 | * If we're a pdflush thread, then implement pdflush collision avoidance | ||
444 | * against the entire list. | ||
445 | * | ||
446 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | ||
447 | * This function assumes that the blockdev superblock's inodes are backed by | ||
448 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
449 | * assume that all inodes are backed by the same queue. | ||
450 | * | ||
451 | * FIXME: this linear search could get expensive with many fileystems. But | ||
452 | * how to fix? We need to go from an address_space to all inodes which share | ||
453 | * a queue with that address_space. (Easy: have a global "dirty superblocks" | ||
454 | * list). | ||
455 | * | ||
456 | * The inodes to be written are parked on sb->s_io. They are moved back onto | ||
457 | * sb->s_dirty as they are selected for writing. This way, none can be missed | ||
458 | * on the writer throttling path, and we get decent balancing between many | ||
459 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
460 | */ | ||
461 | static void generic_sync_sb_inodes(struct super_block *sb, | ||
462 | struct writeback_control *wbc) | ||
463 | { | 472 | { |
473 | const int is_blkdev_sb = sb_is_blkdev_sb(sb); | ||
464 | const unsigned long start = jiffies; /* livelock avoidance */ | 474 | const unsigned long start = jiffies; /* livelock avoidance */ |
465 | int sync = wbc->sync_mode == WB_SYNC_ALL; | ||
466 | 475 | ||
467 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); |
468 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | ||
469 | queue_io(sb, wbc->older_than_this); | ||
470 | 477 | ||
471 | while (!list_empty(&sb->s_io)) { | 478 | if (!wbc->for_kupdate || list_empty(&bdi->b_io)) |
472 | struct inode *inode = list_entry(sb->s_io.prev, | 479 | queue_io(bdi, wbc->older_than_this); |
480 | |||
481 | while (!list_empty(&bdi->b_io)) { | ||
482 | struct inode *inode = list_entry(bdi->b_io.prev, | ||
473 | struct inode, i_list); | 483 | struct inode, i_list); |
474 | struct address_space *mapping = inode->i_mapping; | ||
475 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
476 | long pages_skipped; | 484 | long pages_skipped; |
477 | 485 | ||
486 | /* | ||
487 | * super block given and doesn't match, skip this inode | ||
488 | */ | ||
489 | if (sb && sb != inode->i_sb) { | ||
490 | redirty_tail(inode); | ||
491 | continue; | ||
492 | } | ||
493 | |||
478 | if (!bdi_cap_writeback_dirty(bdi)) { | 494 | if (!bdi_cap_writeback_dirty(bdi)) { |
479 | redirty_tail(inode); | 495 | redirty_tail(inode); |
480 | if (sb_is_blkdev_sb(sb)) { | 496 | if (is_blkdev_sb) { |
481 | /* | 497 | /* |
482 | * Dirty memory-backed blockdev: the ramdisk | 498 | * Dirty memory-backed blockdev: the ramdisk |
483 | * driver does this. Skip just this inode | 499 | * driver does this. Skip just this inode |
@@ -499,14 +515,14 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
499 | 515 | ||
500 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 516 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
501 | wbc->encountered_congestion = 1; | 517 | wbc->encountered_congestion = 1; |
502 | if (!sb_is_blkdev_sb(sb)) | 518 | if (!is_blkdev_sb) |
503 | break; /* Skip a congested fs */ | 519 | break; /* Skip a congested fs */ |
504 | requeue_io(inode); | 520 | requeue_io(inode); |
505 | continue; /* Skip a congested blockdev */ | 521 | continue; /* Skip a congested blockdev */ |
506 | } | 522 | } |
507 | 523 | ||
508 | if (wbc->bdi && bdi != wbc->bdi) { | 524 | if (wbc->bdi && bdi != wbc->bdi) { |
509 | if (!sb_is_blkdev_sb(sb)) | 525 | if (!is_blkdev_sb) |
510 | break; /* fs has the wrong queue */ | 526 | break; /* fs has the wrong queue */ |
511 | requeue_io(inode); | 527 | requeue_io(inode); |
512 | continue; /* blockdev has wrong queue */ | 528 | continue; /* blockdev has wrong queue */ |
@@ -544,13 +560,57 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
544 | wbc->more_io = 1; | 560 | wbc->more_io = 1; |
545 | break; | 561 | break; |
546 | } | 562 | } |
547 | if (!list_empty(&sb->s_more_io)) | 563 | if (!list_empty(&bdi->b_more_io)) |
548 | wbc->more_io = 1; | 564 | wbc->more_io = 1; |
549 | } | 565 | } |
550 | 566 | ||
551 | if (sync) { | 567 | spin_unlock(&inode_lock); |
568 | /* Leave any unwritten inodes on b_io */ | ||
569 | } | ||
570 | |||
571 | /* | ||
572 | * Write out a superblock's list of dirty inodes. A wait will be performed | ||
573 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | ||
574 | * | ||
575 | * If older_than_this is non-NULL, then only write out inodes which | ||
576 | * had their first dirtying at a time earlier than *older_than_this. | ||
577 | * | ||
578 | * If we're a pdlfush thread, then implement pdflush collision avoidance | ||
579 | * against the entire list. | ||
580 | * | ||
581 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | ||
582 | * This function assumes that the blockdev superblock's inodes are backed by | ||
583 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
584 | * assume that all inodes are backed by the same queue. | ||
585 | * | ||
586 | * FIXME: this linear search could get expensive with many fileystems. But | ||
587 | * how to fix? We need to go from an address_space to all inodes which share | ||
588 | * a queue with that address_space. (Easy: have a global "dirty superblocks" | ||
589 | * list). | ||
590 | * | ||
591 | * The inodes to be written are parked on bdi->b_io. They are moved back onto | ||
592 | * bdi->b_dirty as they are selected for writing. This way, none can be missed | ||
593 | * on the writer throttling path, and we get decent balancing between many | ||
594 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
595 | */ | ||
596 | static void generic_sync_sb_inodes(struct super_block *sb, | ||
597 | struct writeback_control *wbc) | ||
598 | { | ||
599 | struct backing_dev_info *bdi; | ||
600 | |||
601 | if (!wbc->bdi) { | ||
602 | mutex_lock(&bdi_lock); | ||
603 | list_for_each_entry(bdi, &bdi_list, bdi_list) | ||
604 | generic_sync_bdi_inodes(bdi, wbc, sb); | ||
605 | mutex_unlock(&bdi_lock); | ||
606 | } else | ||
607 | generic_sync_bdi_inodes(wbc->bdi, wbc, sb); | ||
608 | |||
609 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
552 | struct inode *inode, *old_inode = NULL; | 610 | struct inode *inode, *old_inode = NULL; |
553 | 611 | ||
612 | spin_lock(&inode_lock); | ||
613 | |||
554 | /* | 614 | /* |
555 | * Data integrity sync. Must wait for all pages under writeback, | 615 | * Data integrity sync. Must wait for all pages under writeback, |
556 | * because there may have been pages dirtied before our sync | 616 | * because there may have been pages dirtied before our sync |
@@ -588,10 +648,7 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
588 | } | 648 | } |
589 | spin_unlock(&inode_lock); | 649 | spin_unlock(&inode_lock); |
590 | iput(old_inode); | 650 | iput(old_inode); |
591 | } else | 651 | } |
592 | spin_unlock(&inode_lock); | ||
593 | |||
594 | return; /* Leave any unwritten inodes on s_io */ | ||
595 | } | 652 | } |
596 | 653 | ||
597 | /* | 654 | /* |
@@ -599,8 +656,8 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
599 | * | 656 | * |
600 | * Note: | 657 | * Note: |
601 | * We don't need to grab a reference to superblock here. If it has non-empty | 658 | * We don't need to grab a reference to superblock here. If it has non-empty |
602 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed | 659 | * ->b_dirty it's hadn't been killed yet and kill_super() won't proceed |
603 | * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all | 660 | * past sync_inodes_sb() until the ->b_dirty/b_io/b_more_io lists are all |
604 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves | 661 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves |
605 | * inode from superblock lists we are OK. | 662 | * inode from superblock lists we are OK. |
606 | * | 663 | * |
diff --git a/fs/super.c b/fs/super.c index 2761d3e22ed9..0d22ce3be4aa 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
62 | s = NULL; | 62 | s = NULL; |
63 | goto out; | 63 | goto out; |
64 | } | 64 | } |
65 | INIT_LIST_HEAD(&s->s_dirty); | ||
66 | INIT_LIST_HEAD(&s->s_io); | ||
67 | INIT_LIST_HEAD(&s->s_more_io); | ||
68 | INIT_LIST_HEAD(&s->s_files); | 65 | INIT_LIST_HEAD(&s->s_files); |
69 | INIT_LIST_HEAD(&s->s_instances); | 66 | INIT_LIST_HEAD(&s->s_instances); |
70 | INIT_HLIST_HEAD(&s->s_anon); | 67 | INIT_HLIST_HEAD(&s->s_anon); |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1d52425a6118..928cd5484f4d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -40,6 +40,8 @@ enum bdi_stat_item { | |||
40 | #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) | 40 | #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) |
41 | 41 | ||
42 | struct backing_dev_info { | 42 | struct backing_dev_info { |
43 | struct list_head bdi_list; | ||
44 | |||
43 | unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ | 45 | unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ |
44 | unsigned long state; /* Always use atomic bitops on this */ | 46 | unsigned long state; /* Always use atomic bitops on this */ |
45 | unsigned int capabilities; /* Device capabilities */ | 47 | unsigned int capabilities; /* Device capabilities */ |
@@ -58,6 +60,10 @@ struct backing_dev_info { | |||
58 | 60 | ||
59 | struct device *dev; | 61 | struct device *dev; |
60 | 62 | ||
63 | struct list_head b_dirty; /* dirty inodes */ | ||
64 | struct list_head b_io; /* parked for writeback */ | ||
65 | struct list_head b_more_io; /* parked for more writeback */ | ||
66 | |||
61 | #ifdef CONFIG_DEBUG_FS | 67 | #ifdef CONFIG_DEBUG_FS |
62 | struct dentry *debug_dir; | 68 | struct dentry *debug_dir; |
63 | struct dentry *debug_stats; | 69 | struct dentry *debug_stats; |
@@ -72,6 +78,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
72 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); | 78 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); |
73 | void bdi_unregister(struct backing_dev_info *bdi); | 79 | void bdi_unregister(struct backing_dev_info *bdi); |
74 | 80 | ||
81 | extern struct mutex bdi_lock; | ||
82 | extern struct list_head bdi_list; | ||
83 | |||
75 | static inline void __add_bdi_stat(struct backing_dev_info *bdi, | 84 | static inline void __add_bdi_stat(struct backing_dev_info *bdi, |
76 | enum bdi_stat_item item, s64 amount) | 85 | enum bdi_stat_item item, s64 amount) |
77 | { | 86 | { |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 46ff7dd6e164..56371be1be65 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -715,7 +715,7 @@ struct posix_acl; | |||
715 | 715 | ||
716 | struct inode { | 716 | struct inode { |
717 | struct hlist_node i_hash; | 717 | struct hlist_node i_hash; |
718 | struct list_head i_list; | 718 | struct list_head i_list; /* backing dev IO list */ |
719 | struct list_head i_sb_list; | 719 | struct list_head i_sb_list; |
720 | struct list_head i_dentry; | 720 | struct list_head i_dentry; |
721 | unsigned long i_ino; | 721 | unsigned long i_ino; |
@@ -1336,9 +1336,6 @@ struct super_block { | |||
1336 | struct xattr_handler **s_xattr; | 1336 | struct xattr_handler **s_xattr; |
1337 | 1337 | ||
1338 | struct list_head s_inodes; /* all inodes */ | 1338 | struct list_head s_inodes; /* all inodes */ |
1339 | struct list_head s_dirty; /* dirty inodes */ | ||
1340 | struct list_head s_io; /* parked for writeback */ | ||
1341 | struct list_head s_more_io; /* parked for more writeback */ | ||
1342 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ | 1339 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ |
1343 | struct list_head s_files; | 1340 | struct list_head s_files; |
1344 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ | 1341 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c86edd244294..6f163e0f0509 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -22,6 +22,8 @@ struct backing_dev_info default_backing_dev_info = { | |||
22 | EXPORT_SYMBOL_GPL(default_backing_dev_info); | 22 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
23 | 23 | ||
24 | static struct class *bdi_class; | 24 | static struct class *bdi_class; |
25 | DEFINE_MUTEX(bdi_lock); | ||
26 | LIST_HEAD(bdi_list); | ||
25 | 27 | ||
26 | #ifdef CONFIG_DEBUG_FS | 28 | #ifdef CONFIG_DEBUG_FS |
27 | #include <linux/debugfs.h> | 29 | #include <linux/debugfs.h> |
@@ -211,6 +213,10 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
211 | goto exit; | 213 | goto exit; |
212 | } | 214 | } |
213 | 215 | ||
216 | mutex_lock(&bdi_lock); | ||
217 | list_add_tail(&bdi->bdi_list, &bdi_list); | ||
218 | mutex_unlock(&bdi_lock); | ||
219 | |||
214 | bdi->dev = dev; | 220 | bdi->dev = dev; |
215 | bdi_debug_register(bdi, dev_name(dev)); | 221 | bdi_debug_register(bdi, dev_name(dev)); |
216 | 222 | ||
@@ -225,9 +231,17 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) | |||
225 | } | 231 | } |
226 | EXPORT_SYMBOL(bdi_register_dev); | 232 | EXPORT_SYMBOL(bdi_register_dev); |
227 | 233 | ||
234 | static void bdi_remove_from_list(struct backing_dev_info *bdi) | ||
235 | { | ||
236 | mutex_lock(&bdi_lock); | ||
237 | list_del(&bdi->bdi_list); | ||
238 | mutex_unlock(&bdi_lock); | ||
239 | } | ||
240 | |||
228 | void bdi_unregister(struct backing_dev_info *bdi) | 241 | void bdi_unregister(struct backing_dev_info *bdi) |
229 | { | 242 | { |
230 | if (bdi->dev) { | 243 | if (bdi->dev) { |
244 | bdi_remove_from_list(bdi); | ||
231 | bdi_debug_unregister(bdi); | 245 | bdi_debug_unregister(bdi); |
232 | device_unregister(bdi->dev); | 246 | device_unregister(bdi->dev); |
233 | bdi->dev = NULL; | 247 | bdi->dev = NULL; |
@@ -245,6 +259,10 @@ int bdi_init(struct backing_dev_info *bdi) | |||
245 | bdi->min_ratio = 0; | 259 | bdi->min_ratio = 0; |
246 | bdi->max_ratio = 100; | 260 | bdi->max_ratio = 100; |
247 | bdi->max_prop_frac = PROP_FRAC_BASE; | 261 | bdi->max_prop_frac = PROP_FRAC_BASE; |
262 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
263 | INIT_LIST_HEAD(&bdi->b_io); | ||
264 | INIT_LIST_HEAD(&bdi->b_dirty); | ||
265 | INIT_LIST_HEAD(&bdi->b_more_io); | ||
248 | 266 | ||
249 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 267 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
250 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); | 268 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); |
@@ -259,6 +277,8 @@ int bdi_init(struct backing_dev_info *bdi) | |||
259 | err: | 277 | err: |
260 | while (i--) | 278 | while (i--) |
261 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 279 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
280 | |||
281 | bdi_remove_from_list(bdi); | ||
262 | } | 282 | } |
263 | 283 | ||
264 | return err; | 284 | return err; |
@@ -269,6 +289,10 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
269 | { | 289 | { |
270 | int i; | 290 | int i; |
271 | 291 | ||
292 | WARN_ON(!list_empty(&bdi->b_dirty)); | ||
293 | WARN_ON(!list_empty(&bdi->b_io)); | ||
294 | WARN_ON(!list_empty(&bdi->b_more_io)); | ||
295 | |||
272 | bdi_unregister(bdi); | 296 | bdi_unregister(bdi); |
273 | 297 | ||
274 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 298 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 81627ebcd313..f8341b6019bf 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -320,15 +320,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty) | |||
320 | /* | 320 | /* |
321 | * | 321 | * |
322 | */ | 322 | */ |
323 | static DEFINE_SPINLOCK(bdi_lock); | ||
324 | static unsigned int bdi_min_ratio; | 323 | static unsigned int bdi_min_ratio; |
325 | 324 | ||
326 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | 325 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) |
327 | { | 326 | { |
328 | int ret = 0; | 327 | int ret = 0; |
329 | unsigned long flags; | ||
330 | 328 | ||
331 | spin_lock_irqsave(&bdi_lock, flags); | 329 | mutex_lock(&bdi_lock); |
332 | if (min_ratio > bdi->max_ratio) { | 330 | if (min_ratio > bdi->max_ratio) { |
333 | ret = -EINVAL; | 331 | ret = -EINVAL; |
334 | } else { | 332 | } else { |
@@ -340,27 +338,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
340 | ret = -EINVAL; | 338 | ret = -EINVAL; |
341 | } | 339 | } |
342 | } | 340 | } |
343 | spin_unlock_irqrestore(&bdi_lock, flags); | 341 | mutex_unlock(&bdi_lock); |
344 | 342 | ||
345 | return ret; | 343 | return ret; |
346 | } | 344 | } |
347 | 345 | ||
348 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | 346 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) |
349 | { | 347 | { |
350 | unsigned long flags; | ||
351 | int ret = 0; | 348 | int ret = 0; |
352 | 349 | ||
353 | if (max_ratio > 100) | 350 | if (max_ratio > 100) |
354 | return -EINVAL; | 351 | return -EINVAL; |
355 | 352 | ||
356 | spin_lock_irqsave(&bdi_lock, flags); | 353 | mutex_lock(&bdi_lock); |
357 | if (bdi->min_ratio > max_ratio) { | 354 | if (bdi->min_ratio > max_ratio) { |
358 | ret = -EINVAL; | 355 | ret = -EINVAL; |
359 | } else { | 356 | } else { |
360 | bdi->max_ratio = max_ratio; | 357 | bdi->max_ratio = max_ratio; |
361 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 358 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
362 | } | 359 | } |
363 | spin_unlock_irqrestore(&bdi_lock, flags); | 360 | mutex_unlock(&bdi_lock); |
364 | 361 | ||
365 | return ret; | 362 | return ret; |
366 | } | 363 | } |