aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c90
1 files changed, 33 insertions, 57 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 5930e382959b..a08bb8e61c6f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
54} 54}
55EXPORT_SYMBOL(init_buffer); 55EXPORT_SYMBOL(init_buffer);
56 56
57static int sync_buffer(void *word) 57static int sleep_on_buffer(void *word)
58{ 58{
59 struct block_device *bd;
60 struct buffer_head *bh
61 = container_of(word, struct buffer_head, b_state);
62
63 smp_mb();
64 bd = bh->b_bdev;
65 if (bd)
66 blk_run_address_space(bd->bd_inode->i_mapping);
67 io_schedule(); 59 io_schedule();
68 return 0; 60 return 0;
69} 61}
70 62
71void __lock_buffer(struct buffer_head *bh) 63void __lock_buffer(struct buffer_head *bh)
72{ 64{
73 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, 65 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
74 TASK_UNINTERRUPTIBLE); 66 TASK_UNINTERRUPTIBLE);
75} 67}
76EXPORT_SYMBOL(__lock_buffer); 68EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer);
90 */ 82 */
91void __wait_on_buffer(struct buffer_head * bh) 83void __wait_on_buffer(struct buffer_head * bh)
92{ 84{
93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); 85 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
94} 86}
95EXPORT_SYMBOL(__wait_on_buffer); 87EXPORT_SYMBOL(__wait_on_buffer);
96 88
@@ -749,10 +741,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
749{ 741{
750 struct buffer_head *bh; 742 struct buffer_head *bh;
751 struct list_head tmp; 743 struct list_head tmp;
752 struct address_space *mapping, *prev_mapping = NULL; 744 struct address_space *mapping;
753 int err = 0, err2; 745 int err = 0, err2;
746 struct blk_plug plug;
754 747
755 INIT_LIST_HEAD(&tmp); 748 INIT_LIST_HEAD(&tmp);
749 blk_start_plug(&plug);
756 750
757 spin_lock(lock); 751 spin_lock(lock);
758 while (!list_empty(list)) { 752 while (!list_empty(list)) {
@@ -775,7 +769,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
775 * still in flight on potentially older 769 * still in flight on potentially older
776 * contents. 770 * contents.
777 */ 771 */
778 write_dirty_buffer(bh, WRITE_SYNC_PLUG); 772 write_dirty_buffer(bh, WRITE_SYNC);
779 773
780 /* 774 /*
781 * Kick off IO for the previous mapping. Note 775 * Kick off IO for the previous mapping. Note
@@ -783,16 +777,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
783 * wait_on_buffer() will do that for us 777 * wait_on_buffer() will do that for us
784 * through sync_buffer(). 778 * through sync_buffer().
785 */ 779 */
786 if (prev_mapping && prev_mapping != mapping)
787 blk_run_address_space(prev_mapping);
788 prev_mapping = mapping;
789
790 brelse(bh); 780 brelse(bh);
791 spin_lock(lock); 781 spin_lock(lock);
792 } 782 }
793 } 783 }
794 } 784 }
795 785
786 spin_unlock(lock);
787 blk_finish_plug(&plug);
788 spin_lock(lock);
789
796 while (!list_empty(&tmp)) { 790 while (!list_empty(&tmp)) {
797 bh = BH_ENTRY(tmp.prev); 791 bh = BH_ENTRY(tmp.prev);
798 get_bh(bh); 792 get_bh(bh);
@@ -1144,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1144 * inode list. 1138 * inode list.
1145 * 1139 *
1146 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1140 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
1147 * mapping->tree_lock and the global inode_lock. 1141 * mapping->tree_lock and mapping->host->i_lock.
1148 */ 1142 */
1149void mark_buffer_dirty(struct buffer_head *bh) 1143void mark_buffer_dirty(struct buffer_head *bh)
1150{ 1144{
@@ -1270,12 +1264,10 @@ static inline void check_irqs_on(void)
1270static void bh_lru_install(struct buffer_head *bh) 1264static void bh_lru_install(struct buffer_head *bh)
1271{ 1265{
1272 struct buffer_head *evictee = NULL; 1266 struct buffer_head *evictee = NULL;
1273 struct bh_lru *lru;
1274 1267
1275 check_irqs_on(); 1268 check_irqs_on();
1276 bh_lru_lock(); 1269 bh_lru_lock();
1277 lru = &__get_cpu_var(bh_lrus); 1270 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1278 if (lru->bhs[0] != bh) {
1279 struct buffer_head *bhs[BH_LRU_SIZE]; 1271 struct buffer_head *bhs[BH_LRU_SIZE];
1280 int in; 1272 int in;
1281 int out = 0; 1273 int out = 0;
@@ -1283,7 +1275,8 @@ static void bh_lru_install(struct buffer_head *bh)
1283 get_bh(bh); 1275 get_bh(bh);
1284 bhs[out++] = bh; 1276 bhs[out++] = bh;
1285 for (in = 0; in < BH_LRU_SIZE; in++) { 1277 for (in = 0; in < BH_LRU_SIZE; in++) {
1286 struct buffer_head *bh2 = lru->bhs[in]; 1278 struct buffer_head *bh2 =
1279 __this_cpu_read(bh_lrus.bhs[in]);
1287 1280
1288 if (bh2 == bh) { 1281 if (bh2 == bh) {
1289 __brelse(bh2); 1282 __brelse(bh2);
@@ -1298,7 +1291,7 @@ static void bh_lru_install(struct buffer_head *bh)
1298 } 1291 }
1299 while (out < BH_LRU_SIZE) 1292 while (out < BH_LRU_SIZE)
1300 bhs[out++] = NULL; 1293 bhs[out++] = NULL;
1301 memcpy(lru->bhs, bhs, sizeof(bhs)); 1294 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1302 } 1295 }
1303 bh_lru_unlock(); 1296 bh_lru_unlock();
1304 1297
@@ -1313,23 +1306,22 @@ static struct buffer_head *
1313lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) 1306lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1314{ 1307{
1315 struct buffer_head *ret = NULL; 1308 struct buffer_head *ret = NULL;
1316 struct bh_lru *lru;
1317 unsigned int i; 1309 unsigned int i;
1318 1310
1319 check_irqs_on(); 1311 check_irqs_on();
1320 bh_lru_lock(); 1312 bh_lru_lock();
1321 lru = &__get_cpu_var(bh_lrus);
1322 for (i = 0; i < BH_LRU_SIZE; i++) { 1313 for (i = 0; i < BH_LRU_SIZE; i++) {
1323 struct buffer_head *bh = lru->bhs[i]; 1314 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1324 1315
1325 if (bh && bh->b_bdev == bdev && 1316 if (bh && bh->b_bdev == bdev &&
1326 bh->b_blocknr == block && bh->b_size == size) { 1317 bh->b_blocknr == block && bh->b_size == size) {
1327 if (i) { 1318 if (i) {
1328 while (i) { 1319 while (i) {
1329 lru->bhs[i] = lru->bhs[i - 1]; 1320 __this_cpu_write(bh_lrus.bhs[i],
1321 __this_cpu_read(bh_lrus.bhs[i - 1]));
1330 i--; 1322 i--;
1331 } 1323 }
1332 lru->bhs[0] = bh; 1324 __this_cpu_write(bh_lrus.bhs[0], bh);
1333 } 1325 }
1334 get_bh(bh); 1326 get_bh(bh);
1335 ret = bh; 1327 ret = bh;
@@ -1616,14 +1608,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1616 * prevents this contention from occurring. 1608 * prevents this contention from occurring.
1617 * 1609 *
1618 * If block_write_full_page() is called with wbc->sync_mode == 1610 * If block_write_full_page() is called with wbc->sync_mode ==
1619 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this 1611 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
1620 * causes the writes to be flagged as synchronous writes, but the 1612 * causes the writes to be flagged as synchronous writes.
1621 * block device queue will NOT be unplugged, since usually many pages
1622 * will be pushed to the out before the higher-level caller actually
1623 * waits for the writes to be completed. The various wait functions,
1624 * such as wait_on_writeback_range() will ultimately call sync_page()
1625 * which will ultimately call blk_run_backing_dev(), which will end up
1626 * unplugging the device queue.
1627 */ 1613 */
1628static int __block_write_full_page(struct inode *inode, struct page *page, 1614static int __block_write_full_page(struct inode *inode, struct page *page,
1629 get_block_t *get_block, struct writeback_control *wbc, 1615 get_block_t *get_block, struct writeback_control *wbc,
@@ -1636,7 +1622,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1636 const unsigned blocksize = 1 << inode->i_blkbits; 1622 const unsigned blocksize = 1 << inode->i_blkbits;
1637 int nr_underway = 0; 1623 int nr_underway = 0;
1638 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1624 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1639 WRITE_SYNC_PLUG : WRITE); 1625 WRITE_SYNC : WRITE);
1640 1626
1641 BUG_ON(!PageLocked(page)); 1627 BUG_ON(!PageLocked(page));
1642 1628
@@ -3140,17 +3126,6 @@ out:
3140} 3126}
3141EXPORT_SYMBOL(try_to_free_buffers); 3127EXPORT_SYMBOL(try_to_free_buffers);
3142 3128
3143void block_sync_page(struct page *page)
3144{
3145 struct address_space *mapping;
3146
3147 smp_mb();
3148 mapping = page_mapping(page);
3149 if (mapping)
3150 blk_run_backing_dev(mapping->backing_dev_info, page);
3151}
3152EXPORT_SYMBOL(block_sync_page);
3153
3154/* 3129/*
3155 * There are no bdflush tunables left. But distributions are 3130 * There are no bdflush tunables left. But distributions are
3156 * still running obsolete flush daemons, so we terminate them here. 3131 * still running obsolete flush daemons, so we terminate them here.
@@ -3203,22 +3178,23 @@ static void recalc_bh_state(void)
3203 int i; 3178 int i;
3204 int tot = 0; 3179 int tot = 0;
3205 3180
3206 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) 3181 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3207 return; 3182 return;
3208 __get_cpu_var(bh_accounting).ratelimit = 0; 3183 __this_cpu_write(bh_accounting.ratelimit, 0);
3209 for_each_online_cpu(i) 3184 for_each_online_cpu(i)
3210 tot += per_cpu(bh_accounting, i).nr; 3185 tot += per_cpu(bh_accounting, i).nr;
3211 buffer_heads_over_limit = (tot > max_buffer_heads); 3186 buffer_heads_over_limit = (tot > max_buffer_heads);
3212} 3187}
3213 3188
3214struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3189struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3215{ 3190{
3216 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); 3191 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3217 if (ret) { 3192 if (ret) {
3218 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3193 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3219 get_cpu_var(bh_accounting).nr++; 3194 preempt_disable();
3195 __this_cpu_inc(bh_accounting.nr);
3220 recalc_bh_state(); 3196 recalc_bh_state();
3221 put_cpu_var(bh_accounting); 3197 preempt_enable();
3222 } 3198 }
3223 return ret; 3199 return ret;
3224} 3200}
@@ -3228,9 +3204,10 @@ void free_buffer_head(struct buffer_head *bh)
3228{ 3204{
3229 BUG_ON(!list_empty(&bh->b_assoc_buffers)); 3205 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3230 kmem_cache_free(bh_cachep, bh); 3206 kmem_cache_free(bh_cachep, bh);
3231 get_cpu_var(bh_accounting).nr--; 3207 preempt_disable();
3208 __this_cpu_dec(bh_accounting.nr);
3232 recalc_bh_state(); 3209 recalc_bh_state();
3233 put_cpu_var(bh_accounting); 3210 preempt_enable();
3234} 3211}
3235EXPORT_SYMBOL(free_buffer_head); 3212EXPORT_SYMBOL(free_buffer_head);
3236 3213
@@ -3243,9 +3220,8 @@ static void buffer_exit_cpu(int cpu)
3243 brelse(b->bhs[i]); 3220 brelse(b->bhs[i]);
3244 b->bhs[i] = NULL; 3221 b->bhs[i] = NULL;
3245 } 3222 }
3246 get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; 3223 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3247 per_cpu(bh_accounting, cpu).nr = 0; 3224 per_cpu(bh_accounting, cpu).nr = 0;
3248 put_cpu_var(bh_accounting);
3249} 3225}
3250 3226
3251static int buffer_cpu_notify(struct notifier_block *self, 3227static int buffer_cpu_notify(struct notifier_block *self,