diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 90 |
1 files changed, 33 insertions, 57 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 5930e382959b..a08bb8e61c6f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) | |||
54 | } | 54 | } |
55 | EXPORT_SYMBOL(init_buffer); | 55 | EXPORT_SYMBOL(init_buffer); |
56 | 56 | ||
57 | static int sync_buffer(void *word) | 57 | static int sleep_on_buffer(void *word) |
58 | { | 58 | { |
59 | struct block_device *bd; | ||
60 | struct buffer_head *bh | ||
61 | = container_of(word, struct buffer_head, b_state); | ||
62 | |||
63 | smp_mb(); | ||
64 | bd = bh->b_bdev; | ||
65 | if (bd) | ||
66 | blk_run_address_space(bd->bd_inode->i_mapping); | ||
67 | io_schedule(); | 59 | io_schedule(); |
68 | return 0; | 60 | return 0; |
69 | } | 61 | } |
70 | 62 | ||
71 | void __lock_buffer(struct buffer_head *bh) | 63 | void __lock_buffer(struct buffer_head *bh) |
72 | { | 64 | { |
73 | wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, | 65 | wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer, |
74 | TASK_UNINTERRUPTIBLE); | 66 | TASK_UNINTERRUPTIBLE); |
75 | } | 67 | } |
76 | EXPORT_SYMBOL(__lock_buffer); | 68 | EXPORT_SYMBOL(__lock_buffer); |
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer); | |||
90 | */ | 82 | */ |
91 | void __wait_on_buffer(struct buffer_head * bh) | 83 | void __wait_on_buffer(struct buffer_head * bh) |
92 | { | 84 | { |
93 | wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); | 85 | wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE); |
94 | } | 86 | } |
95 | EXPORT_SYMBOL(__wait_on_buffer); | 87 | EXPORT_SYMBOL(__wait_on_buffer); |
96 | 88 | ||
@@ -749,10 +741,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
749 | { | 741 | { |
750 | struct buffer_head *bh; | 742 | struct buffer_head *bh; |
751 | struct list_head tmp; | 743 | struct list_head tmp; |
752 | struct address_space *mapping, *prev_mapping = NULL; | 744 | struct address_space *mapping; |
753 | int err = 0, err2; | 745 | int err = 0, err2; |
746 | struct blk_plug plug; | ||
754 | 747 | ||
755 | INIT_LIST_HEAD(&tmp); | 748 | INIT_LIST_HEAD(&tmp); |
749 | blk_start_plug(&plug); | ||
756 | 750 | ||
757 | spin_lock(lock); | 751 | spin_lock(lock); |
758 | while (!list_empty(list)) { | 752 | while (!list_empty(list)) { |
@@ -775,7 +769,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
775 | * still in flight on potentially older | 769 | * still in flight on potentially older |
776 | * contents. | 770 | * contents. |
777 | */ | 771 | */ |
778 | write_dirty_buffer(bh, WRITE_SYNC_PLUG); | 772 | write_dirty_buffer(bh, WRITE_SYNC); |
779 | 773 | ||
780 | /* | 774 | /* |
781 | * Kick off IO for the previous mapping. Note | 775 | * Kick off IO for the previous mapping. Note |
@@ -783,16 +777,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
783 | * wait_on_buffer() will do that for us | 777 | * wait_on_buffer() will do that for us |
784 | * through sync_buffer(). | 778 | * through sync_buffer(). |
785 | */ | 779 | */ |
786 | if (prev_mapping && prev_mapping != mapping) | ||
787 | blk_run_address_space(prev_mapping); | ||
788 | prev_mapping = mapping; | ||
789 | |||
790 | brelse(bh); | 780 | brelse(bh); |
791 | spin_lock(lock); | 781 | spin_lock(lock); |
792 | } | 782 | } |
793 | } | 783 | } |
794 | } | 784 | } |
795 | 785 | ||
786 | spin_unlock(lock); | ||
787 | blk_finish_plug(&plug); | ||
788 | spin_lock(lock); | ||
789 | |||
796 | while (!list_empty(&tmp)) { | 790 | while (!list_empty(&tmp)) { |
797 | bh = BH_ENTRY(tmp.prev); | 791 | bh = BH_ENTRY(tmp.prev); |
798 | get_bh(bh); | 792 | get_bh(bh); |
@@ -1144,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
1144 | * inode list. | 1138 | * inode list. |
1145 | * | 1139 | * |
1146 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, | 1140 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, |
1147 | * mapping->tree_lock and the global inode_lock. | 1141 | * mapping->tree_lock and mapping->host->i_lock. |
1148 | */ | 1142 | */ |
1149 | void mark_buffer_dirty(struct buffer_head *bh) | 1143 | void mark_buffer_dirty(struct buffer_head *bh) |
1150 | { | 1144 | { |
@@ -1270,12 +1264,10 @@ static inline void check_irqs_on(void) | |||
1270 | static void bh_lru_install(struct buffer_head *bh) | 1264 | static void bh_lru_install(struct buffer_head *bh) |
1271 | { | 1265 | { |
1272 | struct buffer_head *evictee = NULL; | 1266 | struct buffer_head *evictee = NULL; |
1273 | struct bh_lru *lru; | ||
1274 | 1267 | ||
1275 | check_irqs_on(); | 1268 | check_irqs_on(); |
1276 | bh_lru_lock(); | 1269 | bh_lru_lock(); |
1277 | lru = &__get_cpu_var(bh_lrus); | 1270 | if (__this_cpu_read(bh_lrus.bhs[0]) != bh) { |
1278 | if (lru->bhs[0] != bh) { | ||
1279 | struct buffer_head *bhs[BH_LRU_SIZE]; | 1271 | struct buffer_head *bhs[BH_LRU_SIZE]; |
1280 | int in; | 1272 | int in; |
1281 | int out = 0; | 1273 | int out = 0; |
@@ -1283,7 +1275,8 @@ static void bh_lru_install(struct buffer_head *bh) | |||
1283 | get_bh(bh); | 1275 | get_bh(bh); |
1284 | bhs[out++] = bh; | 1276 | bhs[out++] = bh; |
1285 | for (in = 0; in < BH_LRU_SIZE; in++) { | 1277 | for (in = 0; in < BH_LRU_SIZE; in++) { |
1286 | struct buffer_head *bh2 = lru->bhs[in]; | 1278 | struct buffer_head *bh2 = |
1279 | __this_cpu_read(bh_lrus.bhs[in]); | ||
1287 | 1280 | ||
1288 | if (bh2 == bh) { | 1281 | if (bh2 == bh) { |
1289 | __brelse(bh2); | 1282 | __brelse(bh2); |
@@ -1298,7 +1291,7 @@ static void bh_lru_install(struct buffer_head *bh) | |||
1298 | } | 1291 | } |
1299 | while (out < BH_LRU_SIZE) | 1292 | while (out < BH_LRU_SIZE) |
1300 | bhs[out++] = NULL; | 1293 | bhs[out++] = NULL; |
1301 | memcpy(lru->bhs, bhs, sizeof(bhs)); | 1294 | memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs)); |
1302 | } | 1295 | } |
1303 | bh_lru_unlock(); | 1296 | bh_lru_unlock(); |
1304 | 1297 | ||
@@ -1313,23 +1306,22 @@ static struct buffer_head * | |||
1313 | lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) | 1306 | lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) |
1314 | { | 1307 | { |
1315 | struct buffer_head *ret = NULL; | 1308 | struct buffer_head *ret = NULL; |
1316 | struct bh_lru *lru; | ||
1317 | unsigned int i; | 1309 | unsigned int i; |
1318 | 1310 | ||
1319 | check_irqs_on(); | 1311 | check_irqs_on(); |
1320 | bh_lru_lock(); | 1312 | bh_lru_lock(); |
1321 | lru = &__get_cpu_var(bh_lrus); | ||
1322 | for (i = 0; i < BH_LRU_SIZE; i++) { | 1313 | for (i = 0; i < BH_LRU_SIZE; i++) { |
1323 | struct buffer_head *bh = lru->bhs[i]; | 1314 | struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]); |
1324 | 1315 | ||
1325 | if (bh && bh->b_bdev == bdev && | 1316 | if (bh && bh->b_bdev == bdev && |
1326 | bh->b_blocknr == block && bh->b_size == size) { | 1317 | bh->b_blocknr == block && bh->b_size == size) { |
1327 | if (i) { | 1318 | if (i) { |
1328 | while (i) { | 1319 | while (i) { |
1329 | lru->bhs[i] = lru->bhs[i - 1]; | 1320 | __this_cpu_write(bh_lrus.bhs[i], |
1321 | __this_cpu_read(bh_lrus.bhs[i - 1])); | ||
1330 | i--; | 1322 | i--; |
1331 | } | 1323 | } |
1332 | lru->bhs[0] = bh; | 1324 | __this_cpu_write(bh_lrus.bhs[0], bh); |
1333 | } | 1325 | } |
1334 | get_bh(bh); | 1326 | get_bh(bh); |
1335 | ret = bh; | 1327 | ret = bh; |
@@ -1616,14 +1608,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata); | |||
1616 | * prevents this contention from occurring. | 1608 | * prevents this contention from occurring. |
1617 | * | 1609 | * |
1618 | * If block_write_full_page() is called with wbc->sync_mode == | 1610 | * If block_write_full_page() is called with wbc->sync_mode == |
1619 | * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this | 1611 | * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this |
1620 | * causes the writes to be flagged as synchronous writes, but the | 1612 | * causes the writes to be flagged as synchronous writes. |
1621 | * block device queue will NOT be unplugged, since usually many pages | ||
1622 | * will be pushed to the out before the higher-level caller actually | ||
1623 | * waits for the writes to be completed. The various wait functions, | ||
1624 | * such as wait_on_writeback_range() will ultimately call sync_page() | ||
1625 | * which will ultimately call blk_run_backing_dev(), which will end up | ||
1626 | * unplugging the device queue. | ||
1627 | */ | 1613 | */ |
1628 | static int __block_write_full_page(struct inode *inode, struct page *page, | 1614 | static int __block_write_full_page(struct inode *inode, struct page *page, |
1629 | get_block_t *get_block, struct writeback_control *wbc, | 1615 | get_block_t *get_block, struct writeback_control *wbc, |
@@ -1636,7 +1622,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1636 | const unsigned blocksize = 1 << inode->i_blkbits; | 1622 | const unsigned blocksize = 1 << inode->i_blkbits; |
1637 | int nr_underway = 0; | 1623 | int nr_underway = 0; |
1638 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | 1624 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1639 | WRITE_SYNC_PLUG : WRITE); | 1625 | WRITE_SYNC : WRITE); |
1640 | 1626 | ||
1641 | BUG_ON(!PageLocked(page)); | 1627 | BUG_ON(!PageLocked(page)); |
1642 | 1628 | ||
@@ -3140,17 +3126,6 @@ out: | |||
3140 | } | 3126 | } |
3141 | EXPORT_SYMBOL(try_to_free_buffers); | 3127 | EXPORT_SYMBOL(try_to_free_buffers); |
3142 | 3128 | ||
3143 | void block_sync_page(struct page *page) | ||
3144 | { | ||
3145 | struct address_space *mapping; | ||
3146 | |||
3147 | smp_mb(); | ||
3148 | mapping = page_mapping(page); | ||
3149 | if (mapping) | ||
3150 | blk_run_backing_dev(mapping->backing_dev_info, page); | ||
3151 | } | ||
3152 | EXPORT_SYMBOL(block_sync_page); | ||
3153 | |||
3154 | /* | 3129 | /* |
3155 | * There are no bdflush tunables left. But distributions are | 3130 | * There are no bdflush tunables left. But distributions are |
3156 | * still running obsolete flush daemons, so we terminate them here. | 3131 | * still running obsolete flush daemons, so we terminate them here. |
@@ -3203,22 +3178,23 @@ static void recalc_bh_state(void) | |||
3203 | int i; | 3178 | int i; |
3204 | int tot = 0; | 3179 | int tot = 0; |
3205 | 3180 | ||
3206 | if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) | 3181 | if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096) |
3207 | return; | 3182 | return; |
3208 | __get_cpu_var(bh_accounting).ratelimit = 0; | 3183 | __this_cpu_write(bh_accounting.ratelimit, 0); |
3209 | for_each_online_cpu(i) | 3184 | for_each_online_cpu(i) |
3210 | tot += per_cpu(bh_accounting, i).nr; | 3185 | tot += per_cpu(bh_accounting, i).nr; |
3211 | buffer_heads_over_limit = (tot > max_buffer_heads); | 3186 | buffer_heads_over_limit = (tot > max_buffer_heads); |
3212 | } | 3187 | } |
3213 | 3188 | ||
3214 | struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) | 3189 | struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) |
3215 | { | 3190 | { |
3216 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); | 3191 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); |
3217 | if (ret) { | 3192 | if (ret) { |
3218 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | 3193 | INIT_LIST_HEAD(&ret->b_assoc_buffers); |
3219 | get_cpu_var(bh_accounting).nr++; | 3194 | preempt_disable(); |
3195 | __this_cpu_inc(bh_accounting.nr); | ||
3220 | recalc_bh_state(); | 3196 | recalc_bh_state(); |
3221 | put_cpu_var(bh_accounting); | 3197 | preempt_enable(); |
3222 | } | 3198 | } |
3223 | return ret; | 3199 | return ret; |
3224 | } | 3200 | } |
@@ -3228,9 +3204,10 @@ void free_buffer_head(struct buffer_head *bh) | |||
3228 | { | 3204 | { |
3229 | BUG_ON(!list_empty(&bh->b_assoc_buffers)); | 3205 | BUG_ON(!list_empty(&bh->b_assoc_buffers)); |
3230 | kmem_cache_free(bh_cachep, bh); | 3206 | kmem_cache_free(bh_cachep, bh); |
3231 | get_cpu_var(bh_accounting).nr--; | 3207 | preempt_disable(); |
3208 | __this_cpu_dec(bh_accounting.nr); | ||
3232 | recalc_bh_state(); | 3209 | recalc_bh_state(); |
3233 | put_cpu_var(bh_accounting); | 3210 | preempt_enable(); |
3234 | } | 3211 | } |
3235 | EXPORT_SYMBOL(free_buffer_head); | 3212 | EXPORT_SYMBOL(free_buffer_head); |
3236 | 3213 | ||
@@ -3243,9 +3220,8 @@ static void buffer_exit_cpu(int cpu) | |||
3243 | brelse(b->bhs[i]); | 3220 | brelse(b->bhs[i]); |
3244 | b->bhs[i] = NULL; | 3221 | b->bhs[i] = NULL; |
3245 | } | 3222 | } |
3246 | get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; | 3223 | this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr); |
3247 | per_cpu(bh_accounting, cpu).nr = 0; | 3224 | per_cpu(bh_accounting, cpu).nr = 0; |
3248 | put_cpu_var(bh_accounting); | ||
3249 | } | 3225 | } |
3250 | 3226 | ||
3251 | static int buffer_cpu_notify(struct notifier_block *self, | 3227 | static int buffer_cpu_notify(struct notifier_block *self, |