aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c195
1 files changed, 93 insertions, 102 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 3e7dca279d1c..1a80b048ade8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,6 +41,7 @@
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h> 43#include <linux/bit_spinlock.h>
44#include <linux/cleancache.h>
44 45
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46 47
@@ -54,23 +55,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
54} 55}
55EXPORT_SYMBOL(init_buffer); 56EXPORT_SYMBOL(init_buffer);
56 57
57static int sync_buffer(void *word) 58static int sleep_on_buffer(void *word)
58{ 59{
59 struct block_device *bd;
60 struct buffer_head *bh
61 = container_of(word, struct buffer_head, b_state);
62
63 smp_mb();
64 bd = bh->b_bdev;
65 if (bd)
66 blk_run_address_space(bd->bd_inode->i_mapping);
67 io_schedule(); 60 io_schedule();
68 return 0; 61 return 0;
69} 62}
70 63
71void __lock_buffer(struct buffer_head *bh) 64void __lock_buffer(struct buffer_head *bh)
72{ 65{
73 wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, 66 wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
74 TASK_UNINTERRUPTIBLE); 67 TASK_UNINTERRUPTIBLE);
75} 68}
76EXPORT_SYMBOL(__lock_buffer); 69EXPORT_SYMBOL(__lock_buffer);
@@ -90,7 +83,7 @@ EXPORT_SYMBOL(unlock_buffer);
90 */ 83 */
91void __wait_on_buffer(struct buffer_head * bh) 84void __wait_on_buffer(struct buffer_head * bh)
92{ 85{
93 wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); 86 wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
94} 87}
95EXPORT_SYMBOL(__wait_on_buffer); 88EXPORT_SYMBOL(__wait_on_buffer);
96 89
@@ -156,7 +149,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
156 if (uptodate) { 149 if (uptodate) {
157 set_buffer_uptodate(bh); 150 set_buffer_uptodate(bh);
158 } else { 151 } else {
159 if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { 152 if (!quiet_error(bh)) {
160 buffer_io_error(bh); 153 buffer_io_error(bh);
161 printk(KERN_WARNING "lost page write due to " 154 printk(KERN_WARNING "lost page write due to "
162 "I/O error on %s\n", 155 "I/O error on %s\n",
@@ -277,6 +270,10 @@ void invalidate_bdev(struct block_device *bdev)
277 invalidate_bh_lrus(); 270 invalidate_bh_lrus();
278 lru_add_drain_all(); /* make sure all lru add caches are flushed */ 271 lru_add_drain_all(); /* make sure all lru add caches are flushed */
279 invalidate_mapping_pages(mapping, 0, -1); 272 invalidate_mapping_pages(mapping, 0, -1);
273 /* 99% of the time, we don't need to flush the cleancache on the bdev.
274 * But, for the strange corners, lets be cautious
275 */
276 cleancache_flush_inode(mapping);
280} 277}
281EXPORT_SYMBOL(invalidate_bdev); 278EXPORT_SYMBOL(invalidate_bdev);
282 279
@@ -749,10 +746,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
749{ 746{
750 struct buffer_head *bh; 747 struct buffer_head *bh;
751 struct list_head tmp; 748 struct list_head tmp;
752 struct address_space *mapping, *prev_mapping = NULL; 749 struct address_space *mapping;
753 int err = 0, err2; 750 int err = 0, err2;
751 struct blk_plug plug;
754 752
755 INIT_LIST_HEAD(&tmp); 753 INIT_LIST_HEAD(&tmp);
754 blk_start_plug(&plug);
756 755
757 spin_lock(lock); 756 spin_lock(lock);
758 while (!list_empty(list)) { 757 while (!list_empty(list)) {
@@ -775,7 +774,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
775 * still in flight on potentially older 774 * still in flight on potentially older
776 * contents. 775 * contents.
777 */ 776 */
778 write_dirty_buffer(bh, WRITE_SYNC_PLUG); 777 write_dirty_buffer(bh, WRITE_SYNC);
779 778
780 /* 779 /*
781 * Kick off IO for the previous mapping. Note 780 * Kick off IO for the previous mapping. Note
@@ -783,16 +782,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
783 * wait_on_buffer() will do that for us 782 * wait_on_buffer() will do that for us
784 * through sync_buffer(). 783 * through sync_buffer().
785 */ 784 */
786 if (prev_mapping && prev_mapping != mapping)
787 blk_run_address_space(prev_mapping);
788 prev_mapping = mapping;
789
790 brelse(bh); 785 brelse(bh);
791 spin_lock(lock); 786 spin_lock(lock);
792 } 787 }
793 } 788 }
794 } 789 }
795 790
791 spin_unlock(lock);
792 blk_finish_plug(&plug);
793 spin_lock(lock);
794
796 while (!list_empty(&tmp)) { 795 while (!list_empty(&tmp)) {
797 bh = BH_ENTRY(tmp.prev); 796 bh = BH_ENTRY(tmp.prev);
798 get_bh(bh); 797 get_bh(bh);
@@ -905,7 +904,6 @@ try_again:
905 904
906 bh->b_state = 0; 905 bh->b_state = 0;
907 atomic_set(&bh->b_count, 0); 906 atomic_set(&bh->b_count, 0);
908 bh->b_private = NULL;
909 bh->b_size = size; 907 bh->b_size = size;
910 908
911 /* Link the buffer to its page */ 909 /* Link the buffer to its page */
@@ -1145,7 +1143,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1145 * inode list. 1143 * inode list.
1146 * 1144 *
1147 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, 1145 * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
1148 * mapping->tree_lock and the global inode_lock. 1146 * mapping->tree_lock and mapping->host->i_lock.
1149 */ 1147 */
1150void mark_buffer_dirty(struct buffer_head *bh) 1148void mark_buffer_dirty(struct buffer_head *bh)
1151{ 1149{
@@ -1271,12 +1269,10 @@ static inline void check_irqs_on(void)
1271static void bh_lru_install(struct buffer_head *bh) 1269static void bh_lru_install(struct buffer_head *bh)
1272{ 1270{
1273 struct buffer_head *evictee = NULL; 1271 struct buffer_head *evictee = NULL;
1274 struct bh_lru *lru;
1275 1272
1276 check_irqs_on(); 1273 check_irqs_on();
1277 bh_lru_lock(); 1274 bh_lru_lock();
1278 lru = &__get_cpu_var(bh_lrus); 1275 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1279 if (lru->bhs[0] != bh) {
1280 struct buffer_head *bhs[BH_LRU_SIZE]; 1276 struct buffer_head *bhs[BH_LRU_SIZE];
1281 int in; 1277 int in;
1282 int out = 0; 1278 int out = 0;
@@ -1284,7 +1280,8 @@ static void bh_lru_install(struct buffer_head *bh)
1284 get_bh(bh); 1280 get_bh(bh);
1285 bhs[out++] = bh; 1281 bhs[out++] = bh;
1286 for (in = 0; in < BH_LRU_SIZE; in++) { 1282 for (in = 0; in < BH_LRU_SIZE; in++) {
1287 struct buffer_head *bh2 = lru->bhs[in]; 1283 struct buffer_head *bh2 =
1284 __this_cpu_read(bh_lrus.bhs[in]);
1288 1285
1289 if (bh2 == bh) { 1286 if (bh2 == bh) {
1290 __brelse(bh2); 1287 __brelse(bh2);
@@ -1299,7 +1296,7 @@ static void bh_lru_install(struct buffer_head *bh)
1299 } 1296 }
1300 while (out < BH_LRU_SIZE) 1297 while (out < BH_LRU_SIZE)
1301 bhs[out++] = NULL; 1298 bhs[out++] = NULL;
1302 memcpy(lru->bhs, bhs, sizeof(bhs)); 1299 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1303 } 1300 }
1304 bh_lru_unlock(); 1301 bh_lru_unlock();
1305 1302
@@ -1314,23 +1311,22 @@ static struct buffer_head *
1314lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) 1311lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1315{ 1312{
1316 struct buffer_head *ret = NULL; 1313 struct buffer_head *ret = NULL;
1317 struct bh_lru *lru;
1318 unsigned int i; 1314 unsigned int i;
1319 1315
1320 check_irqs_on(); 1316 check_irqs_on();
1321 bh_lru_lock(); 1317 bh_lru_lock();
1322 lru = &__get_cpu_var(bh_lrus);
1323 for (i = 0; i < BH_LRU_SIZE; i++) { 1318 for (i = 0; i < BH_LRU_SIZE; i++) {
1324 struct buffer_head *bh = lru->bhs[i]; 1319 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1325 1320
1326 if (bh && bh->b_bdev == bdev && 1321 if (bh && bh->b_bdev == bdev &&
1327 bh->b_blocknr == block && bh->b_size == size) { 1322 bh->b_blocknr == block && bh->b_size == size) {
1328 if (i) { 1323 if (i) {
1329 while (i) { 1324 while (i) {
1330 lru->bhs[i] = lru->bhs[i - 1]; 1325 __this_cpu_write(bh_lrus.bhs[i],
1326 __this_cpu_read(bh_lrus.bhs[i - 1]));
1331 i--; 1327 i--;
1332 } 1328 }
1333 lru->bhs[0] = bh; 1329 __this_cpu_write(bh_lrus.bhs[0], bh);
1334 } 1330 }
1335 get_bh(bh); 1331 get_bh(bh);
1336 ret = bh; 1332 ret = bh;
@@ -1617,14 +1613,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1617 * prevents this contention from occurring. 1613 * prevents this contention from occurring.
1618 * 1614 *
1619 * If block_write_full_page() is called with wbc->sync_mode == 1615 * If block_write_full_page() is called with wbc->sync_mode ==
1620 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this 1616 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
1621 * causes the writes to be flagged as synchronous writes, but the 1617 * causes the writes to be flagged as synchronous writes.
1622 * block device queue will NOT be unplugged, since usually many pages
1623 * will be pushed to the out before the higher-level caller actually
1624 * waits for the writes to be completed. The various wait functions,
1625 * such as wait_on_writeback_range() will ultimately call sync_page()
1626 * which will ultimately call blk_run_backing_dev(), which will end up
1627 * unplugging the device queue.
1628 */ 1618 */
1629static int __block_write_full_page(struct inode *inode, struct page *page, 1619static int __block_write_full_page(struct inode *inode, struct page *page,
1630 get_block_t *get_block, struct writeback_control *wbc, 1620 get_block_t *get_block, struct writeback_control *wbc,
@@ -1637,7 +1627,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1637 const unsigned blocksize = 1 << inode->i_blkbits; 1627 const unsigned blocksize = 1 << inode->i_blkbits;
1638 int nr_underway = 0; 1628 int nr_underway = 0;
1639 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1629 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1640 WRITE_SYNC_PLUG : WRITE); 1630 WRITE_SYNC : WRITE);
1641 1631
1642 BUG_ON(!PageLocked(page)); 1632 BUG_ON(!PageLocked(page));
1643 1633
@@ -1706,7 +1696,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1706 * and kswapd activity, but those code paths have their own 1696 * and kswapd activity, but those code paths have their own
1707 * higher-level throttling. 1697 * higher-level throttling.
1708 */ 1698 */
1709 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1699 if (wbc->sync_mode != WB_SYNC_NONE) {
1710 lock_buffer(bh); 1700 lock_buffer(bh);
1711 } else if (!trylock_buffer(bh)) { 1701 } else if (!trylock_buffer(bh)) {
1712 redirty_page_for_writepage(wbc, page); 1702 redirty_page_for_writepage(wbc, page);
@@ -1834,9 +1824,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1834} 1824}
1835EXPORT_SYMBOL(page_zero_new_buffers); 1825EXPORT_SYMBOL(page_zero_new_buffers);
1836 1826
1837int block_prepare_write(struct page *page, unsigned from, unsigned to, 1827int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1838 get_block_t *get_block) 1828 get_block_t *get_block)
1839{ 1829{
1830 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1831 unsigned to = from + len;
1840 struct inode *inode = page->mapping->host; 1832 struct inode *inode = page->mapping->host;
1841 unsigned block_start, block_end; 1833 unsigned block_start, block_end;
1842 sector_t block; 1834 sector_t block;
@@ -1910,13 +1902,11 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
1910 if (!buffer_uptodate(*wait_bh)) 1902 if (!buffer_uptodate(*wait_bh))
1911 err = -EIO; 1903 err = -EIO;
1912 } 1904 }
1913 if (unlikely(err)) { 1905 if (unlikely(err))
1914 page_zero_new_buffers(page, from, to); 1906 page_zero_new_buffers(page, from, to);
1915 ClearPageUptodate(page);
1916 }
1917 return err; 1907 return err;
1918} 1908}
1919EXPORT_SYMBOL(block_prepare_write); 1909EXPORT_SYMBOL(__block_write_begin);
1920 1910
1921static int __block_commit_write(struct inode *inode, struct page *page, 1911static int __block_commit_write(struct inode *inode, struct page *page,
1922 unsigned from, unsigned to) 1912 unsigned from, unsigned to)
@@ -1953,15 +1943,6 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1953 return 0; 1943 return 0;
1954} 1944}
1955 1945
1956int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1957 get_block_t *get_block)
1958{
1959 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1960
1961 return block_prepare_write(page, start, start + len, get_block);
1962}
1963EXPORT_SYMBOL(__block_write_begin);
1964
1965/* 1946/*
1966 * block_write_begin takes care of the basic task of block allocation and 1947 * block_write_begin takes care of the basic task of block allocation and
1967 * bringing partial write blocks uptodate first. 1948 * bringing partial write blocks uptodate first.
@@ -2353,24 +2334,26 @@ EXPORT_SYMBOL(block_commit_write);
2353 * page lock we can determine safely if the page is beyond EOF. If it is not 2334 * page lock we can determine safely if the page is beyond EOF. If it is not
2354 * beyond EOF, then the page is guaranteed safe against truncation until we 2335 * beyond EOF, then the page is guaranteed safe against truncation until we
2355 * unlock the page. 2336 * unlock the page.
2337 *
2338 * Direct callers of this function should call vfs_check_frozen() so that page
2339 * fault does not busyloop until the fs is thawed.
2356 */ 2340 */
2357int 2341int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2358block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 2342 get_block_t get_block)
2359 get_block_t get_block)
2360{ 2343{
2361 struct page *page = vmf->page; 2344 struct page *page = vmf->page;
2362 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 2345 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2363 unsigned long end; 2346 unsigned long end;
2364 loff_t size; 2347 loff_t size;
2365 int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 2348 int ret;
2366 2349
2367 lock_page(page); 2350 lock_page(page);
2368 size = i_size_read(inode); 2351 size = i_size_read(inode);
2369 if ((page->mapping != inode->i_mapping) || 2352 if ((page->mapping != inode->i_mapping) ||
2370 (page_offset(page) > size)) { 2353 (page_offset(page) > size)) {
2371 /* page got truncated out from underneath us */ 2354 /* We overload EFAULT to mean page got truncated */
2372 unlock_page(page); 2355 ret = -EFAULT;
2373 goto out; 2356 goto out_unlock;
2374 } 2357 }
2375 2358
2376 /* page is wholly or partially inside EOF */ 2359 /* page is wholly or partially inside EOF */
@@ -2379,22 +2362,46 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2379 else 2362 else
2380 end = PAGE_CACHE_SIZE; 2363 end = PAGE_CACHE_SIZE;
2381 2364
2382 ret = block_prepare_write(page, 0, end, get_block); 2365 ret = __block_write_begin(page, 0, end, get_block);
2383 if (!ret) 2366 if (!ret)
2384 ret = block_commit_write(page, 0, end); 2367 ret = block_commit_write(page, 0, end);
2385 2368
2386 if (unlikely(ret)) { 2369 if (unlikely(ret < 0))
2387 unlock_page(page); 2370 goto out_unlock;
2388 if (ret == -ENOMEM) 2371 /*
2389 ret = VM_FAULT_OOM; 2372 * Freezing in progress? We check after the page is marked dirty and
2390 else /* -ENOSPC, -EIO, etc */ 2373 * with page lock held so if the test here fails, we are sure freezing
2391 ret = VM_FAULT_SIGBUS; 2374 * code will wait during syncing until the page fault is done - at that
2392 } else 2375 * point page will be dirty and unlocked so freezing code will write it
2393 ret = VM_FAULT_LOCKED; 2376 * and writeprotect it again.
2394 2377 */
2395out: 2378 set_page_dirty(page);
2379 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2380 ret = -EAGAIN;
2381 goto out_unlock;
2382 }
2383 wait_on_page_writeback(page);
2384 return 0;
2385out_unlock:
2386 unlock_page(page);
2396 return ret; 2387 return ret;
2397} 2388}
2389EXPORT_SYMBOL(__block_page_mkwrite);
2390
2391int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2392 get_block_t get_block)
2393{
2394 int ret;
2395 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2396
2397 /*
2398 * This check is racy but catches the common case. The check in
2399 * __block_page_mkwrite() is reliable.
2400 */
2401 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2402 ret = __block_page_mkwrite(vma, vmf, get_block);
2403 return block_page_mkwrite_return(ret);
2404}
2398EXPORT_SYMBOL(block_page_mkwrite); 2405EXPORT_SYMBOL(block_page_mkwrite);
2399 2406
2400/* 2407/*
@@ -2466,11 +2473,10 @@ int nobh_write_begin(struct address_space *mapping,
2466 *fsdata = NULL; 2473 *fsdata = NULL;
2467 2474
2468 if (page_has_buffers(page)) { 2475 if (page_has_buffers(page)) {
2469 unlock_page(page); 2476 ret = __block_write_begin(page, pos, len, get_block);
2470 page_cache_release(page); 2477 if (unlikely(ret))
2471 *pagep = NULL; 2478 goto out_release;
2472 return block_write_begin(mapping, pos, len, flags, pagep, 2479 return ret;
2473 get_block);
2474 } 2480 }
2475 2481
2476 if (PageMappedToDisk(page)) 2482 if (PageMappedToDisk(page))
@@ -2891,7 +2897,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2891 2897
2892 if (err == -EOPNOTSUPP) { 2898 if (err == -EOPNOTSUPP) {
2893 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 2899 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2894 set_bit(BH_Eopnotsupp, &bh->b_state);
2895 } 2900 }
2896 2901
2897 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) 2902 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
@@ -3031,10 +3036,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3031 bh->b_end_io = end_buffer_write_sync; 3036 bh->b_end_io = end_buffer_write_sync;
3032 ret = submit_bh(rw, bh); 3037 ret = submit_bh(rw, bh);
3033 wait_on_buffer(bh); 3038 wait_on_buffer(bh);
3034 if (buffer_eopnotsupp(bh)) {
3035 clear_buffer_eopnotsupp(bh);
3036 ret = -EOPNOTSUPP;
3037 }
3038 if (!ret && !buffer_uptodate(bh)) 3039 if (!ret && !buffer_uptodate(bh))
3039 ret = -EIO; 3040 ret = -EIO;
3040 } else { 3041 } else {
@@ -3154,17 +3155,6 @@ out:
3154} 3155}
3155EXPORT_SYMBOL(try_to_free_buffers); 3156EXPORT_SYMBOL(try_to_free_buffers);
3156 3157
3157void block_sync_page(struct page *page)
3158{
3159 struct address_space *mapping;
3160
3161 smp_mb();
3162 mapping = page_mapping(page);
3163 if (mapping)
3164 blk_run_backing_dev(mapping->backing_dev_info, page);
3165}
3166EXPORT_SYMBOL(block_sync_page);
3167
3168/* 3158/*
3169 * There are no bdflush tunables left. But distributions are 3159 * There are no bdflush tunables left. But distributions are
3170 * still running obsolete flush daemons, so we terminate them here. 3160 * still running obsolete flush daemons, so we terminate them here.
@@ -3217,22 +3207,23 @@ static void recalc_bh_state(void)
3217 int i; 3207 int i;
3218 int tot = 0; 3208 int tot = 0;
3219 3209
3220 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) 3210 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3221 return; 3211 return;
3222 __get_cpu_var(bh_accounting).ratelimit = 0; 3212 __this_cpu_write(bh_accounting.ratelimit, 0);
3223 for_each_online_cpu(i) 3213 for_each_online_cpu(i)
3224 tot += per_cpu(bh_accounting, i).nr; 3214 tot += per_cpu(bh_accounting, i).nr;
3225 buffer_heads_over_limit = (tot > max_buffer_heads); 3215 buffer_heads_over_limit = (tot > max_buffer_heads);
3226} 3216}
3227 3217
3228struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3218struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3229{ 3219{
3230 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); 3220 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3231 if (ret) { 3221 if (ret) {
3232 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3222 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3233 get_cpu_var(bh_accounting).nr++; 3223 preempt_disable();
3224 __this_cpu_inc(bh_accounting.nr);
3234 recalc_bh_state(); 3225 recalc_bh_state();
3235 put_cpu_var(bh_accounting); 3226 preempt_enable();
3236 } 3227 }
3237 return ret; 3228 return ret;
3238} 3229}
@@ -3242,9 +3233,10 @@ void free_buffer_head(struct buffer_head *bh)
3242{ 3233{
3243 BUG_ON(!list_empty(&bh->b_assoc_buffers)); 3234 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3244 kmem_cache_free(bh_cachep, bh); 3235 kmem_cache_free(bh_cachep, bh);
3245 get_cpu_var(bh_accounting).nr--; 3236 preempt_disable();
3237 __this_cpu_dec(bh_accounting.nr);
3246 recalc_bh_state(); 3238 recalc_bh_state();
3247 put_cpu_var(bh_accounting); 3239 preempt_enable();
3248} 3240}
3249EXPORT_SYMBOL(free_buffer_head); 3241EXPORT_SYMBOL(free_buffer_head);
3250 3242
@@ -3257,9 +3249,8 @@ static void buffer_exit_cpu(int cpu)
3257 brelse(b->bhs[i]); 3249 brelse(b->bhs[i]);
3258 b->bhs[i] = NULL; 3250 b->bhs[i] = NULL;
3259 } 3251 }
3260 get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; 3252 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3261 per_cpu(bh_accounting, cpu).nr = 0; 3253 per_cpu(bh_accounting, cpu).nr = 0;
3262 put_cpu_var(bh_accounting);
3263} 3254}
3264 3255
3265static int buffer_cpu_notify(struct notifier_block *self, 3256static int buffer_cpu_notify(struct notifier_block *self,