aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c202
1 files changed, 72 insertions, 130 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index a9b399402007..23f1f3a68077 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -160,12 +160,7 @@ int sync_blockdev(struct block_device *bdev)
160} 160}
161EXPORT_SYMBOL(sync_blockdev); 161EXPORT_SYMBOL(sync_blockdev);
162 162
163/* 163static void __fsync_super(struct super_block *sb)
164 * Write out and wait upon all dirty data associated with this
165 * superblock. Filesystem data as well as the underlying block
166 * device. Takes the superblock lock.
167 */
168int fsync_super(struct super_block *sb)
169{ 164{
170 sync_inodes_sb(sb, 0); 165 sync_inodes_sb(sb, 0);
171 DQUOT_SYNC(sb); 166 DQUOT_SYNC(sb);
@@ -177,7 +172,16 @@ int fsync_super(struct super_block *sb)
177 sb->s_op->sync_fs(sb, 1); 172 sb->s_op->sync_fs(sb, 1);
178 sync_blockdev(sb->s_bdev); 173 sync_blockdev(sb->s_bdev);
179 sync_inodes_sb(sb, 1); 174 sync_inodes_sb(sb, 1);
175}
180 176
177/*
178 * Write out and wait upon all dirty data associated with this
179 * superblock. Filesystem data as well as the underlying block
180 * device. Takes the superblock lock.
181 */
182int fsync_super(struct super_block *sb)
183{
184 __fsync_super(sb);
181 return sync_blockdev(sb->s_bdev); 185 return sync_blockdev(sb->s_bdev);
182} 186}
183 187
@@ -201,7 +205,7 @@ int fsync_bdev(struct block_device *bdev)
201 * freeze_bdev -- lock a filesystem and force it into a consistent state 205 * freeze_bdev -- lock a filesystem and force it into a consistent state
202 * @bdev: blockdevice to lock 206 * @bdev: blockdevice to lock
203 * 207 *
204 * This takes the block device bd_mount_sem to make sure no new mounts 208 * This takes the block device bd_mount_mutex to make sure no new mounts
205 * happen on bdev until thaw_bdev() is called. 209 * happen on bdev until thaw_bdev() is called.
206 * If a superblock is found on this device, we take the s_umount semaphore 210 * If a superblock is found on this device, we take the s_umount semaphore
207 * on it to make sure nobody unmounts until the snapshot creation is done. 211 * on it to make sure nobody unmounts until the snapshot creation is done.
@@ -210,25 +214,13 @@ struct super_block *freeze_bdev(struct block_device *bdev)
210{ 214{
211 struct super_block *sb; 215 struct super_block *sb;
212 216
213 down(&bdev->bd_mount_sem); 217 mutex_lock(&bdev->bd_mount_mutex);
214 sb = get_super(bdev); 218 sb = get_super(bdev);
215 if (sb && !(sb->s_flags & MS_RDONLY)) { 219 if (sb && !(sb->s_flags & MS_RDONLY)) {
216 sb->s_frozen = SB_FREEZE_WRITE; 220 sb->s_frozen = SB_FREEZE_WRITE;
217 smp_wmb(); 221 smp_wmb();
218 222
219 sync_inodes_sb(sb, 0); 223 __fsync_super(sb);
220 DQUOT_SYNC(sb);
221
222 lock_super(sb);
223 if (sb->s_dirt && sb->s_op->write_super)
224 sb->s_op->write_super(sb);
225 unlock_super(sb);
226
227 if (sb->s_op->sync_fs)
228 sb->s_op->sync_fs(sb, 1);
229
230 sync_blockdev(sb->s_bdev);
231 sync_inodes_sb(sb, 1);
232 224
233 sb->s_frozen = SB_FREEZE_TRANS; 225 sb->s_frozen = SB_FREEZE_TRANS;
234 smp_wmb(); 226 smp_wmb();
@@ -264,7 +256,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
264 drop_super(sb); 256 drop_super(sb);
265 } 257 }
266 258
267 up(&bdev->bd_mount_sem); 259 mutex_unlock(&bdev->bd_mount_mutex);
268} 260}
269EXPORT_SYMBOL(thaw_bdev); 261EXPORT_SYMBOL(thaw_bdev);
270 262
@@ -327,31 +319,24 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
327 return ret; 319 return ret;
328} 320}
329 321
330static long do_fsync(unsigned int fd, int datasync) 322long do_fsync(struct file *file, int datasync)
331{ 323{
332 struct file * file; 324 int ret;
333 struct address_space *mapping; 325 int err;
334 int ret, err; 326 struct address_space *mapping = file->f_mapping;
335
336 ret = -EBADF;
337 file = fget(fd);
338 if (!file)
339 goto out;
340 327
341 ret = -EINVAL;
342 if (!file->f_op || !file->f_op->fsync) { 328 if (!file->f_op || !file->f_op->fsync) {
343 /* Why? We can still call filemap_fdatawrite */ 329 /* Why? We can still call filemap_fdatawrite */
344 goto out_putf; 330 ret = -EINVAL;
331 goto out;
345 } 332 }
346 333
347 mapping = file->f_mapping;
348
349 current->flags |= PF_SYNCWRITE; 334 current->flags |= PF_SYNCWRITE;
350 ret = filemap_fdatawrite(mapping); 335 ret = filemap_fdatawrite(mapping);
351 336
352 /* 337 /*
353 * We need to protect against concurrent writers, 338 * We need to protect against concurrent writers, which could cause
354 * which could cause livelocks in fsync_buffers_list 339 * livelocks in fsync_buffers_list().
355 */ 340 */
356 mutex_lock(&mapping->host->i_mutex); 341 mutex_lock(&mapping->host->i_mutex);
357 err = file->f_op->fsync(file, file->f_dentry, datasync); 342 err = file->f_op->fsync(file, file->f_dentry, datasync);
@@ -362,21 +347,31 @@ static long do_fsync(unsigned int fd, int datasync)
362 if (!ret) 347 if (!ret)
363 ret = err; 348 ret = err;
364 current->flags &= ~PF_SYNCWRITE; 349 current->flags &= ~PF_SYNCWRITE;
365
366out_putf:
367 fput(file);
368out: 350out:
369 return ret; 351 return ret;
370} 352}
371 353
354static long __do_fsync(unsigned int fd, int datasync)
355{
356 struct file *file;
357 int ret = -EBADF;
358
359 file = fget(fd);
360 if (file) {
361 ret = do_fsync(file, datasync);
362 fput(file);
363 }
364 return ret;
365}
366
372asmlinkage long sys_fsync(unsigned int fd) 367asmlinkage long sys_fsync(unsigned int fd)
373{ 368{
374 return do_fsync(fd, 0); 369 return __do_fsync(fd, 0);
375} 370}
376 371
377asmlinkage long sys_fdatasync(unsigned int fd) 372asmlinkage long sys_fdatasync(unsigned int fd)
378{ 373{
379 return do_fsync(fd, 1); 374 return __do_fsync(fd, 1);
380} 375}
381 376
382/* 377/*
@@ -431,8 +426,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
431 if (all_mapped) { 426 if (all_mapped) {
432 printk("__find_get_block_slow() failed. " 427 printk("__find_get_block_slow() failed. "
433 "block=%llu, b_blocknr=%llu\n", 428 "block=%llu, b_blocknr=%llu\n",
434 (unsigned long long)block, (unsigned long long)bh->b_blocknr); 429 (unsigned long long)block,
435 printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size); 430 (unsigned long long)bh->b_blocknr);
431 printk("b_state=0x%08lx, b_size=%zu\n",
432 bh->b_state, bh->b_size);
436 printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits); 433 printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits);
437 } 434 }
438out_unlock: 435out_unlock:
@@ -496,7 +493,7 @@ static void free_more_memory(void)
496 wakeup_pdflush(1024); 493 wakeup_pdflush(1024);
497 yield(); 494 yield();
498 495
499 for_each_pgdat(pgdat) { 496 for_each_online_pgdat(pgdat) {
500 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; 497 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
501 if (*zones) 498 if (*zones)
502 try_to_free_pages(zones, GFP_NOFS); 499 try_to_free_pages(zones, GFP_NOFS);
@@ -801,8 +798,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
801 if (!mapping->assoc_mapping) { 798 if (!mapping->assoc_mapping) {
802 mapping->assoc_mapping = buffer_mapping; 799 mapping->assoc_mapping = buffer_mapping;
803 } else { 800 } else {
804 if (mapping->assoc_mapping != buffer_mapping) 801 BUG_ON(mapping->assoc_mapping != buffer_mapping);
805 BUG();
806 } 802 }
807 if (list_empty(&bh->b_assoc_buffers)) { 803 if (list_empty(&bh->b_assoc_buffers)) {
808 spin_lock(&buffer_mapping->private_lock); 804 spin_lock(&buffer_mapping->private_lock);
@@ -865,8 +861,8 @@ int __set_page_dirty_buffers(struct page *page)
865 } 861 }
866 write_unlock_irq(&mapping->tree_lock); 862 write_unlock_irq(&mapping->tree_lock);
867 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 863 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
864 return 1;
868 } 865 }
869
870 return 0; 866 return 0;
871} 867}
872EXPORT_SYMBOL(__set_page_dirty_buffers); 868EXPORT_SYMBOL(__set_page_dirty_buffers);
@@ -1119,8 +1115,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
1119 if (!page) 1115 if (!page)
1120 return NULL; 1116 return NULL;
1121 1117
1122 if (!PageLocked(page)) 1118 BUG_ON(!PageLocked(page));
1123 BUG();
1124 1119
1125 if (page_has_buffers(page)) { 1120 if (page_has_buffers(page)) {
1126 bh = page_buffers(page); 1121 bh = page_buffers(page);
@@ -1527,8 +1522,7 @@ void set_bh_page(struct buffer_head *bh,
1527 struct page *page, unsigned long offset) 1522 struct page *page, unsigned long offset)
1528{ 1523{
1529 bh->b_page = page; 1524 bh->b_page = page;
1530 if (offset >= PAGE_SIZE) 1525 BUG_ON(offset >= PAGE_SIZE);
1531 BUG();
1532 if (PageHighMem(page)) 1526 if (PageHighMem(page))
1533 /* 1527 /*
1534 * This catches illegal uses and preserves the offset: 1528 * This catches illegal uses and preserves the offset:
@@ -1598,11 +1592,10 @@ EXPORT_SYMBOL(try_to_release_page);
1598 * point. Because the caller is about to free (and possibly reuse) those 1592 * point. Because the caller is about to free (and possibly reuse) those
1599 * blocks on-disk. 1593 * blocks on-disk.
1600 */ 1594 */
1601int block_invalidatepage(struct page *page, unsigned long offset) 1595void block_invalidatepage(struct page *page, unsigned long offset)
1602{ 1596{
1603 struct buffer_head *head, *bh, *next; 1597 struct buffer_head *head, *bh, *next;
1604 unsigned int curr_off = 0; 1598 unsigned int curr_off = 0;
1605 int ret = 1;
1606 1599
1607 BUG_ON(!PageLocked(page)); 1600 BUG_ON(!PageLocked(page));
1608 if (!page_has_buffers(page)) 1601 if (!page_has_buffers(page))
@@ -1629,19 +1622,18 @@ int block_invalidatepage(struct page *page, unsigned long offset)
1629 * so real IO is not possible anymore. 1622 * so real IO is not possible anymore.
1630 */ 1623 */
1631 if (offset == 0) 1624 if (offset == 0)
1632 ret = try_to_release_page(page, 0); 1625 try_to_release_page(page, 0);
1633out: 1626out:
1634 return ret; 1627 return;
1635} 1628}
1636EXPORT_SYMBOL(block_invalidatepage); 1629EXPORT_SYMBOL(block_invalidatepage);
1637 1630
1638int do_invalidatepage(struct page *page, unsigned long offset) 1631void do_invalidatepage(struct page *page, unsigned long offset)
1639{ 1632{
1640 int (*invalidatepage)(struct page *, unsigned long); 1633 void (*invalidatepage)(struct page *, unsigned long);
1641 invalidatepage = page->mapping->a_ops->invalidatepage; 1634 invalidatepage = page->mapping->a_ops->invalidatepage ? :
1642 if (invalidatepage == NULL) 1635 block_invalidatepage;
1643 invalidatepage = block_invalidatepage; 1636 (*invalidatepage)(page, offset);
1644 return (*invalidatepage)(page, offset);
1645} 1637}
1646 1638
1647/* 1639/*
@@ -1743,6 +1735,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1743 sector_t block; 1735 sector_t block;
1744 sector_t last_block; 1736 sector_t last_block;
1745 struct buffer_head *bh, *head; 1737 struct buffer_head *bh, *head;
1738 const unsigned blocksize = 1 << inode->i_blkbits;
1746 int nr_underway = 0; 1739 int nr_underway = 0;
1747 1740
1748 BUG_ON(!PageLocked(page)); 1741 BUG_ON(!PageLocked(page));
@@ -1750,7 +1743,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1750 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; 1743 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1751 1744
1752 if (!page_has_buffers(page)) { 1745 if (!page_has_buffers(page)) {
1753 create_empty_buffers(page, 1 << inode->i_blkbits, 1746 create_empty_buffers(page, blocksize,
1754 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1747 (1 << BH_Dirty)|(1 << BH_Uptodate));
1755 } 1748 }
1756 1749
@@ -1785,6 +1778,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1785 clear_buffer_dirty(bh); 1778 clear_buffer_dirty(bh);
1786 set_buffer_uptodate(bh); 1779 set_buffer_uptodate(bh);
1787 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { 1780 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1781 WARN_ON(bh->b_size != blocksize);
1788 err = get_block(inode, block, bh, 1); 1782 err = get_block(inode, block, bh, 1);
1789 if (err) 1783 if (err)
1790 goto recover; 1784 goto recover;
@@ -1938,6 +1932,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1938 if (buffer_new(bh)) 1932 if (buffer_new(bh))
1939 clear_buffer_new(bh); 1933 clear_buffer_new(bh);
1940 if (!buffer_mapped(bh)) { 1934 if (!buffer_mapped(bh)) {
1935 WARN_ON(bh->b_size != blocksize);
1941 err = get_block(inode, block, bh, 1); 1936 err = get_block(inode, block, bh, 1);
1942 if (err) 1937 if (err)
1943 break; 1938 break;
@@ -2093,6 +2088,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2093 2088
2094 fully_mapped = 0; 2089 fully_mapped = 0;
2095 if (iblock < lblock) { 2090 if (iblock < lblock) {
2091 WARN_ON(bh->b_size != blocksize);
2096 err = get_block(inode, iblock, bh, 0); 2092 err = get_block(inode, iblock, bh, 0);
2097 if (err) 2093 if (err)
2098 SetPageError(page); 2094 SetPageError(page);
@@ -2414,6 +2410,7 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2414 create = 1; 2410 create = 1;
2415 if (block_start >= to) 2411 if (block_start >= to)
2416 create = 0; 2412 create = 0;
2413 map_bh.b_size = blocksize;
2417 ret = get_block(inode, block_in_file + block_in_page, 2414 ret = get_block(inode, block_in_file + block_in_page,
2418 &map_bh, create); 2415 &map_bh, create);
2419 if (ret) 2416 if (ret)
@@ -2674,6 +2671,7 @@ int block_truncate_page(struct address_space *mapping,
2674 2671
2675 err = 0; 2672 err = 0;
2676 if (!buffer_mapped(bh)) { 2673 if (!buffer_mapped(bh)) {
2674 WARN_ON(bh->b_size != blocksize);
2677 err = get_block(inode, iblock, bh, 0); 2675 err = get_block(inode, iblock, bh, 0);
2678 if (err) 2676 if (err)
2679 goto unlock; 2677 goto unlock;
@@ -2760,6 +2758,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2760 struct inode *inode = mapping->host; 2758 struct inode *inode = mapping->host;
2761 tmp.b_state = 0; 2759 tmp.b_state = 0;
2762 tmp.b_blocknr = 0; 2760 tmp.b_blocknr = 0;
2761 tmp.b_size = 1 << inode->i_blkbits;
2763 get_block(inode, block, &tmp, 0); 2762 get_block(inode, block, &tmp, 0);
2764 return tmp.b_blocknr; 2763 return tmp.b_blocknr;
2765} 2764}
@@ -3012,7 +3011,7 @@ out:
3012} 3011}
3013EXPORT_SYMBOL(try_to_free_buffers); 3012EXPORT_SYMBOL(try_to_free_buffers);
3014 3013
3015int block_sync_page(struct page *page) 3014void block_sync_page(struct page *page)
3016{ 3015{
3017 struct address_space *mapping; 3016 struct address_space *mapping;
3018 3017
@@ -3020,7 +3019,6 @@ int block_sync_page(struct page *page)
3020 mapping = page_mapping(page); 3019 mapping = page_mapping(page);
3021 if (mapping) 3020 if (mapping)
3022 blk_run_backing_dev(mapping->backing_dev_info, page); 3021 blk_run_backing_dev(mapping->backing_dev_info, page);
3023 return 0;
3024} 3022}
3025 3023
3026/* 3024/*
@@ -3051,68 +3049,6 @@ asmlinkage long sys_bdflush(int func, long data)
3051} 3049}
3052 3050
3053/* 3051/*
3054 * Migration function for pages with buffers. This function can only be used
3055 * if the underlying filesystem guarantees that no other references to "page"
3056 * exist.
3057 */
3058#ifdef CONFIG_MIGRATION
3059int buffer_migrate_page(struct page *newpage, struct page *page)
3060{
3061 struct address_space *mapping = page->mapping;
3062 struct buffer_head *bh, *head;
3063 int rc;
3064
3065 if (!mapping)
3066 return -EAGAIN;
3067
3068 if (!page_has_buffers(page))
3069 return migrate_page(newpage, page);
3070
3071 head = page_buffers(page);
3072
3073 rc = migrate_page_remove_references(newpage, page, 3);
3074 if (rc)
3075 return rc;
3076
3077 bh = head;
3078 do {
3079 get_bh(bh);
3080 lock_buffer(bh);
3081 bh = bh->b_this_page;
3082
3083 } while (bh != head);
3084
3085 ClearPagePrivate(page);
3086 set_page_private(newpage, page_private(page));
3087 set_page_private(page, 0);
3088 put_page(page);
3089 get_page(newpage);
3090
3091 bh = head;
3092 do {
3093 set_bh_page(bh, newpage, bh_offset(bh));
3094 bh = bh->b_this_page;
3095
3096 } while (bh != head);
3097
3098 SetPagePrivate(newpage);
3099
3100 migrate_page_copy(newpage, page);
3101
3102 bh = head;
3103 do {
3104 unlock_buffer(bh);
3105 put_bh(bh);
3106 bh = bh->b_this_page;
3107
3108 } while (bh != head);
3109
3110 return 0;
3111}
3112EXPORT_SYMBOL(buffer_migrate_page);
3113#endif
3114
3115/*
3116 * Buffer-head allocation 3052 * Buffer-head allocation
3117 */ 3053 */
3118static kmem_cache_t *bh_cachep; 3054static kmem_cache_t *bh_cachep;
@@ -3140,7 +3076,7 @@ static void recalc_bh_state(void)
3140 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) 3076 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096)
3141 return; 3077 return;
3142 __get_cpu_var(bh_accounting).ratelimit = 0; 3078 __get_cpu_var(bh_accounting).ratelimit = 0;
3143 for_each_cpu(i) 3079 for_each_online_cpu(i)
3144 tot += per_cpu(bh_accounting, i).nr; 3080 tot += per_cpu(bh_accounting, i).nr;
3145 buffer_heads_over_limit = (tot > max_buffer_heads); 3081 buffer_heads_over_limit = (tot > max_buffer_heads);
3146} 3082}
@@ -3189,6 +3125,9 @@ static void buffer_exit_cpu(int cpu)
3189 brelse(b->bhs[i]); 3125 brelse(b->bhs[i]);
3190 b->bhs[i] = NULL; 3126 b->bhs[i] = NULL;
3191 } 3127 }
3128 get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr;
3129 per_cpu(bh_accounting, cpu).nr = 0;
3130 put_cpu_var(bh_accounting);
3192} 3131}
3193 3132
3194static int buffer_cpu_notify(struct notifier_block *self, 3133static int buffer_cpu_notify(struct notifier_block *self,
@@ -3205,8 +3144,11 @@ void __init buffer_init(void)
3205 int nrpages; 3144 int nrpages;
3206 3145
3207 bh_cachep = kmem_cache_create("buffer_head", 3146 bh_cachep = kmem_cache_create("buffer_head",
3208 sizeof(struct buffer_head), 0, 3147 sizeof(struct buffer_head), 0,
3209 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL); 3148 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3149 SLAB_MEM_SPREAD),
3150 init_buffer_head,
3151 NULL);
3210 3152
3211 /* 3153 /*
3212 * Limit the bh occupancy to 10% of ZONE_NORMAL 3154 * Limit the bh occupancy to 10% of ZONE_NORMAL