diff options
| -rw-r--r-- | fs/fs-writeback.c | 197 | ||||
| -rw-r--r-- | fs/super.c | 3 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 9 | ||||
| -rw-r--r-- | include/linux/fs.h | 5 | ||||
| -rw-r--r-- | mm/backing-dev.c | 24 | ||||
| -rw-r--r-- | mm/page-writeback.c | 11 |
6 files changed, 165 insertions, 84 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 271e5f44e871..45ad4bb700e6 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/buffer_head.h> | 25 | #include <linux/buffer_head.h> |
| 26 | #include "internal.h" | 26 | #include "internal.h" |
| 27 | 27 | ||
| 28 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
| 28 | 29 | ||
| 29 | /** | 30 | /** |
| 30 | * writeback_acquire - attempt to get exclusive writeback access to a device | 31 | * writeback_acquire - attempt to get exclusive writeback access to a device |
| @@ -165,12 +166,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
| 165 | goto out; | 166 | goto out; |
| 166 | 167 | ||
| 167 | /* | 168 | /* |
| 168 | * If the inode was already on s_dirty/s_io/s_more_io, don't | 169 | * If the inode was already on b_dirty/b_io/b_more_io, don't |
| 169 | * reposition it (that would break s_dirty time-ordering). | 170 | * reposition it (that would break b_dirty time-ordering). |
| 170 | */ | 171 | */ |
| 171 | if (!was_dirty) { | 172 | if (!was_dirty) { |
| 172 | inode->dirtied_when = jiffies; | 173 | inode->dirtied_when = jiffies; |
| 173 | list_move(&inode->i_list, &sb->s_dirty); | 174 | list_move(&inode->i_list, |
| 175 | &inode_to_bdi(inode)->b_dirty); | ||
| 174 | } | 176 | } |
| 175 | } | 177 | } |
| 176 | out: | 178 | out: |
| @@ -191,31 +193,30 @@ static int write_inode(struct inode *inode, int sync) | |||
| 191 | * furthest end of its superblock's dirty-inode list. | 193 | * furthest end of its superblock's dirty-inode list. |
| 192 | * | 194 | * |
| 193 | * Before stamping the inode's ->dirtied_when, we check to see whether it is | 195 | * Before stamping the inode's ->dirtied_when, we check to see whether it is |
| 194 | * already the most-recently-dirtied inode on the s_dirty list. If that is | 196 | * already the most-recently-dirtied inode on the b_dirty list. If that is |
| 195 | * the case then the inode must have been redirtied while it was being written | 197 | * the case then the inode must have been redirtied while it was being written |
| 196 | * out and we don't reset its dirtied_when. | 198 | * out and we don't reset its dirtied_when. |
| 197 | */ | 199 | */ |
| 198 | static void redirty_tail(struct inode *inode) | 200 | static void redirty_tail(struct inode *inode) |
| 199 | { | 201 | { |
| 200 | struct super_block *sb = inode->i_sb; | 202 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
| 201 | 203 | ||
| 202 | if (!list_empty(&sb->s_dirty)) { | 204 | if (!list_empty(&bdi->b_dirty)) { |
| 203 | struct inode *tail_inode; | 205 | struct inode *tail; |
| 204 | 206 | ||
| 205 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); | 207 | tail = list_entry(bdi->b_dirty.next, struct inode, i_list); |
| 206 | if (time_before(inode->dirtied_when, | 208 | if (time_before(inode->dirtied_when, tail->dirtied_when)) |
| 207 | tail_inode->dirtied_when)) | ||
| 208 | inode->dirtied_when = jiffies; | 209 | inode->dirtied_when = jiffies; |
| 209 | } | 210 | } |
| 210 | list_move(&inode->i_list, &sb->s_dirty); | 211 | list_move(&inode->i_list, &bdi->b_dirty); |
| 211 | } | 212 | } |
| 212 | 213 | ||
| 213 | /* | 214 | /* |
| 214 | * requeue inode for re-scanning after sb->s_io list is exhausted. | 215 | * requeue inode for re-scanning after bdi->b_io list is exhausted. |
| 215 | */ | 216 | */ |
| 216 | static void requeue_io(struct inode *inode) | 217 | static void requeue_io(struct inode *inode) |
| 217 | { | 218 | { |
| 218 | list_move(&inode->i_list, &inode->i_sb->s_more_io); | 219 | list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io); |
| 219 | } | 220 | } |
| 220 | 221 | ||
| 221 | static void inode_sync_complete(struct inode *inode) | 222 | static void inode_sync_complete(struct inode *inode) |
| @@ -262,18 +263,50 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
| 262 | /* | 263 | /* |
| 263 | * Queue all expired dirty inodes for io, eldest first. | 264 | * Queue all expired dirty inodes for io, eldest first. |
| 264 | */ | 265 | */ |
| 265 | static void queue_io(struct super_block *sb, | 266 | static void queue_io(struct backing_dev_info *bdi, |
| 266 | unsigned long *older_than_this) | 267 | unsigned long *older_than_this) |
| 268 | { | ||
| 269 | list_splice_init(&bdi->b_more_io, bdi->b_io.prev); | ||
| 270 | move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this); | ||
| 271 | } | ||
| 272 | |||
| 273 | static int sb_on_inode_list(struct super_block *sb, struct list_head *list) | ||
| 267 | { | 274 | { |
| 268 | list_splice_init(&sb->s_more_io, sb->s_io.prev); | 275 | struct inode *inode; |
| 269 | move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); | 276 | int ret = 0; |
| 277 | |||
| 278 | spin_lock(&inode_lock); | ||
| 279 | list_for_each_entry(inode, list, i_list) { | ||
| 280 | if (inode->i_sb == sb) { | ||
| 281 | ret = 1; | ||
| 282 | break; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | spin_unlock(&inode_lock); | ||
| 286 | return ret; | ||
| 270 | } | 287 | } |
| 271 | 288 | ||
| 272 | int sb_has_dirty_inodes(struct super_block *sb) | 289 | int sb_has_dirty_inodes(struct super_block *sb) |
| 273 | { | 290 | { |
| 274 | return !list_empty(&sb->s_dirty) || | 291 | struct backing_dev_info *bdi; |
| 275 | !list_empty(&sb->s_io) || | 292 | int ret = 0; |
| 276 | !list_empty(&sb->s_more_io); | 293 | |
| 294 | /* | ||
| 295 | * This is REALLY expensive right now, but it'll go away | ||
| 296 | * when the bdi writeback is introduced | ||
| 297 | */ | ||
| 298 | mutex_lock(&bdi_lock); | ||
| 299 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | ||
| 300 | if (sb_on_inode_list(sb, &bdi->b_dirty) || | ||
| 301 | sb_on_inode_list(sb, &bdi->b_io) || | ||
| 302 | sb_on_inode_list(sb, &bdi->b_more_io)) { | ||
| 303 | ret = 1; | ||
| 304 | break; | ||
| 305 | } | ||
| 306 | } | ||
| 307 | mutex_unlock(&bdi_lock); | ||
| 308 | |||
| 309 | return ret; | ||
| 277 | } | 310 | } |
| 278 | EXPORT_SYMBOL(sb_has_dirty_inodes); | 311 | EXPORT_SYMBOL(sb_has_dirty_inodes); |
| 279 | 312 | ||
| @@ -322,11 +355,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 322 | if (inode->i_state & I_SYNC) { | 355 | if (inode->i_state & I_SYNC) { |
| 323 | /* | 356 | /* |
| 324 | * If this inode is locked for writeback and we are not doing | 357 | * If this inode is locked for writeback and we are not doing |
| 325 | * writeback-for-data-integrity, move it to s_more_io so that | 358 | * writeback-for-data-integrity, move it to b_more_io so that |
| 326 | * writeback can proceed with the other inodes on s_io. | 359 | * writeback can proceed with the other inodes on s_io. |
| 327 | * | 360 | * |
| 328 | * We'll have another go at writing back this inode when we | 361 | * We'll have another go at writing back this inode when we |
| 329 | * completed a full scan of s_io. | 362 | * completed a full scan of b_io. |
| 330 | */ | 363 | */ |
| 331 | if (!wait) { | 364 | if (!wait) { |
| 332 | requeue_io(inode); | 365 | requeue_io(inode); |
| @@ -371,11 +404,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 371 | /* | 404 | /* |
| 372 | * We didn't write back all the pages. nfs_writepages() | 405 | * We didn't write back all the pages. nfs_writepages() |
| 373 | * sometimes bales out without doing anything. Redirty | 406 | * sometimes bales out without doing anything. Redirty |
| 374 | * the inode; Move it from s_io onto s_more_io/s_dirty. | 407 | * the inode; Move it from b_io onto b_more_io/b_dirty. |
| 375 | */ | 408 | */ |
| 376 | /* | 409 | /* |
| 377 | * akpm: if the caller was the kupdate function we put | 410 | * akpm: if the caller was the kupdate function we put |
| 378 | * this inode at the head of s_dirty so it gets first | 411 | * this inode at the head of b_dirty so it gets first |
| 379 | * consideration. Otherwise, move it to the tail, for | 412 | * consideration. Otherwise, move it to the tail, for |
| 380 | * the reasons described there. I'm not really sure | 413 | * the reasons described there. I'm not really sure |
| 381 | * how much sense this makes. Presumably I had a good | 414 | * how much sense this makes. Presumably I had a good |
| @@ -385,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 385 | if (wbc->for_kupdate) { | 418 | if (wbc->for_kupdate) { |
| 386 | /* | 419 | /* |
| 387 | * For the kupdate function we move the inode | 420 | * For the kupdate function we move the inode |
| 388 | * to s_more_io so it will get more writeout as | 421 | * to b_more_io so it will get more writeout as |
| 389 | * soon as the queue becomes uncongested. | 422 | * soon as the queue becomes uncongested. |
| 390 | */ | 423 | */ |
| 391 | inode->i_state |= I_DIRTY_PAGES; | 424 | inode->i_state |= I_DIRTY_PAGES; |
| @@ -433,51 +466,34 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 433 | return ret; | 466 | return ret; |
| 434 | } | 467 | } |
| 435 | 468 | ||
| 436 | /* | 469 | static void generic_sync_bdi_inodes(struct backing_dev_info *bdi, |
| 437 | * Write out a superblock's list of dirty inodes. A wait will be performed | 470 | struct writeback_control *wbc, |
| 438 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | 471 | struct super_block *sb) |
| 439 | * | ||
| 440 | * If older_than_this is non-NULL, then only write out inodes which | ||
| 441 | * had their first dirtying at a time earlier than *older_than_this. | ||
| 442 | * | ||
| 443 | * If we're a pdflush thread, then implement pdflush collision avoidance | ||
| 444 | * against the entire list. | ||
| 445 | * | ||
| 446 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | ||
| 447 | * This function assumes that the blockdev superblock's inodes are backed by | ||
| 448 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
| 449 | * assume that all inodes are backed by the same queue. | ||
| 450 | * | ||
| 451 | * FIXME: this linear search could get expensive with many fileystems. But | ||
| 452 | * how to fix? We need to go from an address_space to all inodes which share | ||
| 453 | * a queue with that address_space. (Easy: have a global "dirty superblocks" | ||
| 454 | * list). | ||
| 455 | * | ||
| 456 | * The inodes to be written are parked on sb->s_io. They are moved back onto | ||
| 457 | * sb->s_dirty as they are selected for writing. This way, none can be missed | ||
| 458 | * on the writer throttling path, and we get decent balancing between many | ||
| 459 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
| 460 | */ | ||
| 461 | static void generic_sync_sb_inodes(struct super_block *sb, | ||
| 462 | struct writeback_control *wbc) | ||
| 463 | { | 472 | { |
| 473 | const int is_blkdev_sb = sb_is_blkdev_sb(sb); | ||
| 464 | const unsigned long start = jiffies; /* livelock avoidance */ | 474 | const unsigned long start = jiffies; /* livelock avoidance */ |
| 465 | int sync = wbc->sync_mode == WB_SYNC_ALL; | ||
| 466 | 475 | ||
| 467 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); |
| 468 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | ||
| 469 | queue_io(sb, wbc->older_than_this); | ||
| 470 | 477 | ||
| 471 | while (!list_empty(&sb->s_io)) { | 478 | if (!wbc->for_kupdate || list_empty(&bdi->b_io)) |
| 472 | struct inode *inode = list_entry(sb->s_io.prev, | 479 | queue_io(bdi, wbc->older_than_this); |
| 480 | |||
| 481 | while (!list_empty(&bdi->b_io)) { | ||
| 482 | struct inode *inode = list_entry(bdi->b_io.prev, | ||
| 473 | struct inode, i_list); | 483 | struct inode, i_list); |
| 474 | struct address_space *mapping = inode->i_mapping; | ||
| 475 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
| 476 | long pages_skipped; | 484 | long pages_skipped; |
| 477 | 485 | ||
| 486 | /* | ||
| 487 | * super block given and doesn't match, skip this inode | ||
| 488 | */ | ||
| 489 | if (sb && sb != inode->i_sb) { | ||
| 490 | redirty_tail(inode); | ||
| 491 | continue; | ||
| 492 | } | ||
| 493 | |||
| 478 | if (!bdi_cap_writeback_dirty(bdi)) { | 494 | if (!bdi_cap_writeback_dirty(bdi)) { |
| 479 | redirty_tail(inode); | 495 | redirty_tail(inode); |
| 480 | if (sb_is_blkdev_sb(sb)) { | 496 | if (is_blkdev_sb) { |
| 481 | /* | 497 | /* |
| 482 | * Dirty memory-backed blockdev: the ramdisk | 498 | * Dirty memory-backed blockdev: the ramdisk |
| 483 | * driver does this. Skip just this inode | 499 | * driver does this. Skip just this inode |
| @@ -499,14 +515,14 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
| 499 | 515 | ||
| 500 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 516 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
| 501 | wbc->encountered_congestion = 1; | 517 | wbc->encountered_congestion = 1; |
| 502 | if (!sb_is_blkdev_sb(sb)) | 518 | if (!is_blkdev_sb) |
| 503 | break; /* Skip a congested fs */ | 519 | break; /* Skip a congested fs */ |
| 504 | requeue_io(inode); | 520 | requeue_io(inode); |
| 505 | continue; /* Skip a congested blockdev */ | 521 | continue; /* Skip a congested blockdev */ |
| 506 | } | 522 | } |
| 507 | 523 | ||
| 508 | if (wbc->bdi && bdi != wbc->bdi) { | 524 | if (wbc->bdi && bdi != wbc->bdi) { |
| 509 | if (!sb_is_blkdev_sb(sb)) | 525 | if (!is_blkdev_sb) |
| 510 | break; /* fs has the wrong queue */ | 526 | break; /* fs has the wrong queue */ |
| 511 | requeue_io(inode); | 527 | requeue_io(inode); |
| 512 | continue; /* blockdev has wrong queue */ | 528 | continue; /* blockdev has wrong queue */ |
| @@ -544,13 +560,57 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
| 544 | wbc->more_io = 1; | 560 | wbc->more_io = 1; |
| 545 | break; | 561 | break; |
| 546 | } | 562 | } |
| 547 | if (!list_empty(&sb->s_more_io)) | 563 | if (!list_empty(&bdi->b_more_io)) |
| 548 | wbc->more_io = 1; | 564 | wbc->more_io = 1; |
| 549 | } | 565 | } |
| 550 | 566 | ||
| 551 | if (sync) { | 567 | spin_unlock(&inode_lock); |
| 568 | /* Leave any unwritten inodes on b_io */ | ||
| 569 | } | ||
| 570 | |||
| 571 | /* | ||
| 572 | * Write out a superblock's list of dirty inodes. A wait will be performed | ||
| 573 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | ||
| 574 | * | ||
| 575 | * If older_than_this is non-NULL, then only write out inodes which | ||
| 576 | * had their first dirtying at a time earlier than *older_than_this. | ||
| 577 | * | ||
| 578 | * If we're a pdlfush thread, then implement pdflush collision avoidance | ||
| 579 | * against the entire list. | ||
| 580 | * | ||
| 581 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | ||
| 582 | * This function assumes that the blockdev superblock's inodes are backed by | ||
| 583 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
| 584 | * assume that all inodes are backed by the same queue. | ||
| 585 | * | ||
| 586 | * FIXME: this linear search could get expensive with many fileystems. But | ||
| 587 | * how to fix? We need to go from an address_space to all inodes which share | ||
| 588 | * a queue with that address_space. (Easy: have a global "dirty superblocks" | ||
| 589 | * list). | ||
| 590 | * | ||
| 591 | * The inodes to be written are parked on bdi->b_io. They are moved back onto | ||
| 592 | * bdi->b_dirty as they are selected for writing. This way, none can be missed | ||
| 593 | * on the writer throttling path, and we get decent balancing between many | ||
| 594 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
| 595 | */ | ||
| 596 | static void generic_sync_sb_inodes(struct super_block *sb, | ||
| 597 | struct writeback_control *wbc) | ||
| 598 | { | ||
| 599 | struct backing_dev_info *bdi; | ||
| 600 | |||
| 601 | if (!wbc->bdi) { | ||
| 602 | mutex_lock(&bdi_lock); | ||
| 603 | list_for_each_entry(bdi, &bdi_list, bdi_list) | ||
| 604 | generic_sync_bdi_inodes(bdi, wbc, sb); | ||
| 605 | mutex_unlock(&bdi_lock); | ||
| 606 | } else | ||
| 607 | generic_sync_bdi_inodes(wbc->bdi, wbc, sb); | ||
| 608 | |||
| 609 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
| 552 | struct inode *inode, *old_inode = NULL; | 610 | struct inode *inode, *old_inode = NULL; |
| 553 | 611 | ||
| 612 | spin_lock(&inode_lock); | ||
| 613 | |||
| 554 | /* | 614 | /* |
| 555 | * Data integrity sync. Must wait for all pages under writeback, | 615 | * Data integrity sync. Must wait for all pages under writeback, |
| 556 | * because there may have been pages dirtied before our sync | 616 | * because there may have been pages dirtied before our sync |
| @@ -588,10 +648,7 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
| 588 | } | 648 | } |
| 589 | spin_unlock(&inode_lock); | 649 | spin_unlock(&inode_lock); |
| 590 | iput(old_inode); | 650 | iput(old_inode); |
| 591 | } else | 651 | } |
| 592 | spin_unlock(&inode_lock); | ||
| 593 | |||
| 594 | return; /* Leave any unwritten inodes on s_io */ | ||
| 595 | } | 652 | } |
| 596 | 653 | ||
| 597 | /* | 654 | /* |
| @@ -599,8 +656,8 @@ static void generic_sync_sb_inodes(struct super_block *sb, | |||
| 599 | * | 656 | * |
| 600 | * Note: | 657 | * Note: |
| 601 | * We don't need to grab a reference to superblock here. If it has non-empty | 658 | * We don't need to grab a reference to superblock here. If it has non-empty |
| 602 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed | 659 | * ->b_dirty it's hadn't been killed yet and kill_super() won't proceed |
| 603 | * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all | 660 | * past sync_inodes_sb() until the ->b_dirty/b_io/b_more_io lists are all |
| 604 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves | 661 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves |
| 605 | * inode from superblock lists we are OK. | 662 | * inode from superblock lists we are OK. |
| 606 | * | 663 | * |
diff --git a/fs/super.c b/fs/super.c index 2761d3e22ed9..0d22ce3be4aa 100644 --- a/fs/super.c +++ b/fs/super.c | |||
| @@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
| 62 | s = NULL; | 62 | s = NULL; |
| 63 | goto out; | 63 | goto out; |
| 64 | } | 64 | } |
| 65 | INIT_LIST_HEAD(&s->s_dirty); | ||
| 66 | INIT_LIST_HEAD(&s->s_io); | ||
| 67 | INIT_LIST_HEAD(&s->s_more_io); | ||
| 68 | INIT_LIST_HEAD(&s->s_files); | 65 | INIT_LIST_HEAD(&s->s_files); |
| 69 | INIT_LIST_HEAD(&s->s_instances); | 66 | INIT_LIST_HEAD(&s->s_instances); |
| 70 | INIT_HLIST_HEAD(&s->s_anon); | 67 | INIT_HLIST_HEAD(&s->s_anon); |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1d52425a6118..928cd5484f4d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -40,6 +40,8 @@ enum bdi_stat_item { | |||
| 40 | #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) | 40 | #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) |
| 41 | 41 | ||
| 42 | struct backing_dev_info { | 42 | struct backing_dev_info { |
| 43 | struct list_head bdi_list; | ||
| 44 | |||
| 43 | unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ | 45 | unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ |
| 44 | unsigned long state; /* Always use atomic bitops on this */ | 46 | unsigned long state; /* Always use atomic bitops on this */ |
| 45 | unsigned int capabilities; /* Device capabilities */ | 47 | unsigned int capabilities; /* Device capabilities */ |
| @@ -58,6 +60,10 @@ struct backing_dev_info { | |||
| 58 | 60 | ||
| 59 | struct device *dev; | 61 | struct device *dev; |
| 60 | 62 | ||
| 63 | struct list_head b_dirty; /* dirty inodes */ | ||
| 64 | struct list_head b_io; /* parked for writeback */ | ||
| 65 | struct list_head b_more_io; /* parked for more writeback */ | ||
| 66 | |||
| 61 | #ifdef CONFIG_DEBUG_FS | 67 | #ifdef CONFIG_DEBUG_FS |
| 62 | struct dentry *debug_dir; | 68 | struct dentry *debug_dir; |
| 63 | struct dentry *debug_stats; | 69 | struct dentry *debug_stats; |
| @@ -72,6 +78,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
| 72 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); | 78 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); |
| 73 | void bdi_unregister(struct backing_dev_info *bdi); | 79 | void bdi_unregister(struct backing_dev_info *bdi); |
| 74 | 80 | ||
| 81 | extern struct mutex bdi_lock; | ||
| 82 | extern struct list_head bdi_list; | ||
| 83 | |||
| 75 | static inline void __add_bdi_stat(struct backing_dev_info *bdi, | 84 | static inline void __add_bdi_stat(struct backing_dev_info *bdi, |
| 76 | enum bdi_stat_item item, s64 amount) | 85 | enum bdi_stat_item item, s64 amount) |
| 77 | { | 86 | { |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 46ff7dd6e164..56371be1be65 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -715,7 +715,7 @@ struct posix_acl; | |||
| 715 | 715 | ||
| 716 | struct inode { | 716 | struct inode { |
| 717 | struct hlist_node i_hash; | 717 | struct hlist_node i_hash; |
| 718 | struct list_head i_list; | 718 | struct list_head i_list; /* backing dev IO list */ |
| 719 | struct list_head i_sb_list; | 719 | struct list_head i_sb_list; |
| 720 | struct list_head i_dentry; | 720 | struct list_head i_dentry; |
| 721 | unsigned long i_ino; | 721 | unsigned long i_ino; |
| @@ -1336,9 +1336,6 @@ struct super_block { | |||
| 1336 | struct xattr_handler **s_xattr; | 1336 | struct xattr_handler **s_xattr; |
| 1337 | 1337 | ||
| 1338 | struct list_head s_inodes; /* all inodes */ | 1338 | struct list_head s_inodes; /* all inodes */ |
| 1339 | struct list_head s_dirty; /* dirty inodes */ | ||
| 1340 | struct list_head s_io; /* parked for writeback */ | ||
| 1341 | struct list_head s_more_io; /* parked for more writeback */ | ||
| 1342 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ | 1339 | struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ |
| 1343 | struct list_head s_files; | 1340 | struct list_head s_files; |
| 1344 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ | 1341 | /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c86edd244294..6f163e0f0509 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -22,6 +22,8 @@ struct backing_dev_info default_backing_dev_info = { | |||
| 22 | EXPORT_SYMBOL_GPL(default_backing_dev_info); | 22 | EXPORT_SYMBOL_GPL(default_backing_dev_info); |
| 23 | 23 | ||
| 24 | static struct class *bdi_class; | 24 | static struct class *bdi_class; |
| 25 | DEFINE_MUTEX(bdi_lock); | ||
| 26 | LIST_HEAD(bdi_list); | ||
| 25 | 27 | ||
| 26 | #ifdef CONFIG_DEBUG_FS | 28 | #ifdef CONFIG_DEBUG_FS |
| 27 | #include <linux/debugfs.h> | 29 | #include <linux/debugfs.h> |
| @@ -211,6 +213,10 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
| 211 | goto exit; | 213 | goto exit; |
| 212 | } | 214 | } |
| 213 | 215 | ||
| 216 | mutex_lock(&bdi_lock); | ||
| 217 | list_add_tail(&bdi->bdi_list, &bdi_list); | ||
| 218 | mutex_unlock(&bdi_lock); | ||
| 219 | |||
| 214 | bdi->dev = dev; | 220 | bdi->dev = dev; |
| 215 | bdi_debug_register(bdi, dev_name(dev)); | 221 | bdi_debug_register(bdi, dev_name(dev)); |
| 216 | 222 | ||
| @@ -225,9 +231,17 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) | |||
| 225 | } | 231 | } |
| 226 | EXPORT_SYMBOL(bdi_register_dev); | 232 | EXPORT_SYMBOL(bdi_register_dev); |
| 227 | 233 | ||
| 234 | static void bdi_remove_from_list(struct backing_dev_info *bdi) | ||
| 235 | { | ||
| 236 | mutex_lock(&bdi_lock); | ||
| 237 | list_del(&bdi->bdi_list); | ||
| 238 | mutex_unlock(&bdi_lock); | ||
| 239 | } | ||
| 240 | |||
| 228 | void bdi_unregister(struct backing_dev_info *bdi) | 241 | void bdi_unregister(struct backing_dev_info *bdi) |
| 229 | { | 242 | { |
| 230 | if (bdi->dev) { | 243 | if (bdi->dev) { |
| 244 | bdi_remove_from_list(bdi); | ||
| 231 | bdi_debug_unregister(bdi); | 245 | bdi_debug_unregister(bdi); |
| 232 | device_unregister(bdi->dev); | 246 | device_unregister(bdi->dev); |
| 233 | bdi->dev = NULL; | 247 | bdi->dev = NULL; |
| @@ -245,6 +259,10 @@ int bdi_init(struct backing_dev_info *bdi) | |||
| 245 | bdi->min_ratio = 0; | 259 | bdi->min_ratio = 0; |
| 246 | bdi->max_ratio = 100; | 260 | bdi->max_ratio = 100; |
| 247 | bdi->max_prop_frac = PROP_FRAC_BASE; | 261 | bdi->max_prop_frac = PROP_FRAC_BASE; |
| 262 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
| 263 | INIT_LIST_HEAD(&bdi->b_io); | ||
| 264 | INIT_LIST_HEAD(&bdi->b_dirty); | ||
| 265 | INIT_LIST_HEAD(&bdi->b_more_io); | ||
| 248 | 266 | ||
| 249 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { | 267 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { |
| 250 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); | 268 | err = percpu_counter_init(&bdi->bdi_stat[i], 0); |
| @@ -259,6 +277,8 @@ int bdi_init(struct backing_dev_info *bdi) | |||
| 259 | err: | 277 | err: |
| 260 | while (i--) | 278 | while (i--) |
| 261 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 279 | percpu_counter_destroy(&bdi->bdi_stat[i]); |
| 280 | |||
| 281 | bdi_remove_from_list(bdi); | ||
| 262 | } | 282 | } |
| 263 | 283 | ||
| 264 | return err; | 284 | return err; |
| @@ -269,6 +289,10 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
| 269 | { | 289 | { |
| 270 | int i; | 290 | int i; |
| 271 | 291 | ||
| 292 | WARN_ON(!list_empty(&bdi->b_dirty)); | ||
| 293 | WARN_ON(!list_empty(&bdi->b_io)); | ||
| 294 | WARN_ON(!list_empty(&bdi->b_more_io)); | ||
| 295 | |||
| 272 | bdi_unregister(bdi); | 296 | bdi_unregister(bdi); |
| 273 | 297 | ||
| 274 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 298 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 81627ebcd313..f8341b6019bf 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -320,15 +320,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty) | |||
| 320 | /* | 320 | /* |
| 321 | * | 321 | * |
| 322 | */ | 322 | */ |
| 323 | static DEFINE_SPINLOCK(bdi_lock); | ||
| 324 | static unsigned int bdi_min_ratio; | 323 | static unsigned int bdi_min_ratio; |
| 325 | 324 | ||
| 326 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | 325 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) |
| 327 | { | 326 | { |
| 328 | int ret = 0; | 327 | int ret = 0; |
| 329 | unsigned long flags; | ||
| 330 | 328 | ||
| 331 | spin_lock_irqsave(&bdi_lock, flags); | 329 | mutex_lock(&bdi_lock); |
| 332 | if (min_ratio > bdi->max_ratio) { | 330 | if (min_ratio > bdi->max_ratio) { |
| 333 | ret = -EINVAL; | 331 | ret = -EINVAL; |
| 334 | } else { | 332 | } else { |
| @@ -340,27 +338,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) | |||
| 340 | ret = -EINVAL; | 338 | ret = -EINVAL; |
| 341 | } | 339 | } |
| 342 | } | 340 | } |
| 343 | spin_unlock_irqrestore(&bdi_lock, flags); | 341 | mutex_unlock(&bdi_lock); |
| 344 | 342 | ||
| 345 | return ret; | 343 | return ret; |
| 346 | } | 344 | } |
| 347 | 345 | ||
| 348 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) | 346 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) |
| 349 | { | 347 | { |
| 350 | unsigned long flags; | ||
| 351 | int ret = 0; | 348 | int ret = 0; |
| 352 | 349 | ||
| 353 | if (max_ratio > 100) | 350 | if (max_ratio > 100) |
| 354 | return -EINVAL; | 351 | return -EINVAL; |
| 355 | 352 | ||
| 356 | spin_lock_irqsave(&bdi_lock, flags); | 353 | mutex_lock(&bdi_lock); |
| 357 | if (bdi->min_ratio > max_ratio) { | 354 | if (bdi->min_ratio > max_ratio) { |
| 358 | ret = -EINVAL; | 355 | ret = -EINVAL; |
| 359 | } else { | 356 | } else { |
| 360 | bdi->max_ratio = max_ratio; | 357 | bdi->max_ratio = max_ratio; |
| 361 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; | 358 | bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; |
| 362 | } | 359 | } |
| 363 | spin_unlock_irqrestore(&bdi_lock, flags); | 360 | mutex_unlock(&bdi_lock); |
| 364 | 361 | ||
| 365 | return ret; | 362 | return ret; |
| 366 | } | 363 | } |
