aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/Changes2
-rw-r--r--fs/aio.c115
-rw-r--r--fs/btrfs/check-integrity.c32
-rw-r--r--fs/btrfs/check-integrity.h2
-rw-r--r--fs/btrfs/extent-tree.c22
-rw-r--r--fs/btrfs/extent_io.c12
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/relocation.c81
-rw-r--r--fs/btrfs/scrub.c33
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/ceph/addr.c10
-rw-r--r--fs/ceph/cache.c3
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/inode.c183
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h8
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/ioctl.c6
-rw-r--r--fs/cifs/smb2ops.c99
-rw-r--r--fs/cifs/smb2pdu.c92
-rw-r--r--fs/cifs/smb2pdu.h12
-rw-r--r--fs/cifs/smb2proto.h1
-rw-r--r--fs/cifs/smbfsctl.h2
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/eventpoll.c3
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/extents.c45
-rw-r--r--fs/ext4/inode.c12
-rw-r--r--fs/ext4/mballoc.c17
-rw-r--r--fs/ext4/super.c21
-rw-r--r--fs/hfsplus/wrapper.c17
-rw-r--r--fs/jbd2/journal.c18
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c16
-rw-r--r--fs/logfs/dev_bdev.c13
-rw-r--r--fs/namei.c10
-rw-r--r--fs/nfs/blocklayout/blocklayout.h1
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/dns_resolve.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h15
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4proc.c30
-rw-r--r--fs/nfsd/nfscache.c9
-rw-r--r--fs/pipe.c39
-rw-r--r--fs/proc/inode.c14
-rw-r--r--fs/pstore/platform.c7
-rw-r--r--fs/squashfs/file_direct.c5
-rw-r--r--fs/sysfs/file.c18
-rw-r--r--fs/xfs/xfs_bmap.c32
-rw-r--r--fs/xfs/xfs_bmap_util.c14
-rw-r--r--fs/xfs/xfs_buf.c37
-rw-r--r--fs/xfs/xfs_buf.h11
-rw-r--r--fs/xfs/xfs_buf_item.c21
-rw-r--r--fs/xfs/xfs_dir2_node.c26
-rw-r--r--fs/xfs/xfs_discard.c5
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_iops.c3
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_qm.c80
-rw-r--r--fs/xfs/xfs_trans_buf.c13
68 files changed, 931 insertions, 512 deletions
diff --git a/fs/affs/Changes b/fs/affs/Changes
index a29409c1ffe0..b41c2c9792ff 100644
--- a/fs/affs/Changes
+++ b/fs/affs/Changes
@@ -91,7 +91,7 @@ more 2.4 fixes: [Roman Zippel]
91Version 3.11 91Version 3.11
92------------ 92------------
93 93
94- Converted to use 2.3.x page cache [Dave Jones <dave@powertweak.com>] 94- Converted to use 2.3.x page cache [Dave Jones]
95- Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>] 95- Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>]
96 96
97Version 3.10 97Version 3.10
diff --git a/fs/aio.c b/fs/aio.c
index 08159ed13649..062a5f6a1448 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
244 int i; 244 int i;
245 245
246 for (i = 0; i < ctx->nr_pages; i++) { 246 for (i = 0; i < ctx->nr_pages; i++) {
247 struct page *page;
247 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, 248 pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
248 page_count(ctx->ring_pages[i])); 249 page_count(ctx->ring_pages[i]));
249 put_page(ctx->ring_pages[i]); 250 page = ctx->ring_pages[i];
251 if (!page)
252 continue;
253 ctx->ring_pages[i] = NULL;
254 put_page(page);
250 } 255 }
251 256
252 put_aio_ring_file(ctx); 257 put_aio_ring_file(ctx);
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
280 unsigned long flags; 285 unsigned long flags;
281 int rc; 286 int rc;
282 287
288 rc = 0;
289
290 /* Make sure the old page hasn't already been changed */
291 spin_lock(&mapping->private_lock);
292 ctx = mapping->private_data;
293 if (ctx) {
294 pgoff_t idx;
295 spin_lock_irqsave(&ctx->completion_lock, flags);
296 idx = old->index;
297 if (idx < (pgoff_t)ctx->nr_pages) {
298 if (ctx->ring_pages[idx] != old)
299 rc = -EAGAIN;
300 } else
301 rc = -EINVAL;
302 spin_unlock_irqrestore(&ctx->completion_lock, flags);
303 } else
304 rc = -EINVAL;
305 spin_unlock(&mapping->private_lock);
306
307 if (rc != 0)
308 return rc;
309
283 /* Writeback must be complete */ 310 /* Writeback must be complete */
284 BUG_ON(PageWriteback(old)); 311 BUG_ON(PageWriteback(old));
285 put_page(old); 312 get_page(new);
286 313
287 rc = migrate_page_move_mapping(mapping, new, old, NULL, mode); 314 rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
288 if (rc != MIGRATEPAGE_SUCCESS) { 315 if (rc != MIGRATEPAGE_SUCCESS) {
289 get_page(old); 316 put_page(new);
290 return rc; 317 return rc;
291 } 318 }
292 319
293 get_page(new);
294
295 /* We can potentially race against kioctx teardown here. Use the 320 /* We can potentially race against kioctx teardown here. Use the
296 * address_space's private data lock to protect the mapping's 321 * address_space's private data lock to protect the mapping's
297 * private_data. 322 * private_data.
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
303 spin_lock_irqsave(&ctx->completion_lock, flags); 328 spin_lock_irqsave(&ctx->completion_lock, flags);
304 migrate_page_copy(new, old); 329 migrate_page_copy(new, old);
305 idx = old->index; 330 idx = old->index;
306 if (idx < (pgoff_t)ctx->nr_pages) 331 if (idx < (pgoff_t)ctx->nr_pages) {
307 ctx->ring_pages[idx] = new; 332 /* And only do the move if things haven't changed */
333 if (ctx->ring_pages[idx] == old)
334 ctx->ring_pages[idx] = new;
335 else
336 rc = -EAGAIN;
337 } else
338 rc = -EINVAL;
308 spin_unlock_irqrestore(&ctx->completion_lock, flags); 339 spin_unlock_irqrestore(&ctx->completion_lock, flags);
309 } else 340 } else
310 rc = -EBUSY; 341 rc = -EBUSY;
311 spin_unlock(&mapping->private_lock); 342 spin_unlock(&mapping->private_lock);
312 343
344 if (rc == MIGRATEPAGE_SUCCESS)
345 put_page(old);
346 else
347 put_page(new);
348
313 return rc; 349 return rc;
314} 350}
315#endif 351#endif
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
326 struct aio_ring *ring; 362 struct aio_ring *ring;
327 unsigned nr_events = ctx->max_reqs; 363 unsigned nr_events = ctx->max_reqs;
328 struct mm_struct *mm = current->mm; 364 struct mm_struct *mm = current->mm;
329 unsigned long size, populate; 365 unsigned long size, unused;
330 int nr_pages; 366 int nr_pages;
331 int i; 367 int i;
332 struct file *file; 368 struct file *file;
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx)
347 return -EAGAIN; 383 return -EAGAIN;
348 } 384 }
349 385
386 ctx->aio_ring_file = file;
387 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
388 / sizeof(struct io_event);
389
390 ctx->ring_pages = ctx->internal_pages;
391 if (nr_pages > AIO_RING_PAGES) {
392 ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
393 GFP_KERNEL);
394 if (!ctx->ring_pages) {
395 put_aio_ring_file(ctx);
396 return -ENOMEM;
397 }
398 }
399
350 for (i = 0; i < nr_pages; i++) { 400 for (i = 0; i < nr_pages; i++) {
351 struct page *page; 401 struct page *page;
352 page = find_or_create_page(file->f_inode->i_mapping, 402 page = find_or_create_page(file->f_inode->i_mapping,
@@ -358,17 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
358 SetPageUptodate(page); 408 SetPageUptodate(page);
359 SetPageDirty(page); 409 SetPageDirty(page);
360 unlock_page(page); 410 unlock_page(page);
411
412 ctx->ring_pages[i] = page;
361 } 413 }
362 ctx->aio_ring_file = file; 414 ctx->nr_pages = i;
363 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
364 / sizeof(struct io_event);
365 415
366 ctx->ring_pages = ctx->internal_pages; 416 if (unlikely(i != nr_pages)) {
367 if (nr_pages > AIO_RING_PAGES) { 417 aio_free_ring(ctx);
368 ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), 418 return -EAGAIN;
369 GFP_KERNEL);
370 if (!ctx->ring_pages)
371 return -ENOMEM;
372 } 419 }
373 420
374 ctx->mmap_size = nr_pages * PAGE_SIZE; 421 ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -377,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
377 down_write(&mm->mmap_sem); 424 down_write(&mm->mmap_sem);
378 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, 425 ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
379 PROT_READ | PROT_WRITE, 426 PROT_READ | PROT_WRITE,
380 MAP_SHARED | MAP_POPULATE, 0, &populate); 427 MAP_SHARED, 0, &unused);
428 up_write(&mm->mmap_sem);
381 if (IS_ERR((void *)ctx->mmap_base)) { 429 if (IS_ERR((void *)ctx->mmap_base)) {
382 up_write(&mm->mmap_sem);
383 ctx->mmap_size = 0; 430 ctx->mmap_size = 0;
384 aio_free_ring(ctx); 431 aio_free_ring(ctx);
385 return -EAGAIN; 432 return -EAGAIN;
@@ -387,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
387 434
388 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); 435 pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
389 436
390 /* We must do this while still holding mmap_sem for write, as we
391 * need to be protected against userspace attempting to mremap()
392 * or munmap() the ring buffer.
393 */
394 ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
395 1, 0, ctx->ring_pages, NULL);
396
397 /* Dropping the reference here is safe as the page cache will hold
398 * onto the pages for us. It is also required so that page migration
399 * can unmap the pages and get the right reference count.
400 */
401 for (i = 0; i < ctx->nr_pages; i++)
402 put_page(ctx->ring_pages[i]);
403
404 up_write(&mm->mmap_sem);
405
406 if (unlikely(ctx->nr_pages != nr_pages)) {
407 aio_free_ring(ctx);
408 return -EAGAIN;
409 }
410
411 ctx->user_id = ctx->mmap_base; 437 ctx->user_id = ctx->mmap_base;
412 ctx->nr_events = nr_events; /* trusted copy */ 438 ctx->nr_events = nr_events; /* trusted copy */
413 439
@@ -645,12 +671,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
645 aio_nr + nr_events < aio_nr) { 671 aio_nr + nr_events < aio_nr) {
646 spin_unlock(&aio_nr_lock); 672 spin_unlock(&aio_nr_lock);
647 err = -EAGAIN; 673 err = -EAGAIN;
648 goto err; 674 goto err_ctx;
649 } 675 }
650 aio_nr += ctx->max_reqs; 676 aio_nr += ctx->max_reqs;
651 spin_unlock(&aio_nr_lock); 677 spin_unlock(&aio_nr_lock);
652 678
653 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ 679 percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
680 percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */
654 681
655 err = ioctx_add_table(ctx, mm); 682 err = ioctx_add_table(ctx, mm);
656 if (err) 683 if (err)
@@ -662,6 +689,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
662 689
663err_cleanup: 690err_cleanup:
664 aio_nr_sub(ctx->max_reqs); 691 aio_nr_sub(ctx->max_reqs);
692err_ctx:
693 aio_free_ring(ctx);
665err: 694err:
666 free_percpu(ctx->cpu); 695 free_percpu(ctx->cpu);
667 free_percpu(ctx->reqs.pcpu_count); 696 free_percpu(ctx->reqs.pcpu_count);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b50764bef141..131d82800b3a 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -333,7 +333,6 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
333static int btrfsic_read_block(struct btrfsic_state *state, 333static int btrfsic_read_block(struct btrfsic_state *state,
334 struct btrfsic_block_data_ctx *block_ctx); 334 struct btrfsic_block_data_ctx *block_ctx);
335static void btrfsic_dump_database(struct btrfsic_state *state); 335static void btrfsic_dump_database(struct btrfsic_state *state);
336static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
337static int btrfsic_test_for_metadata(struct btrfsic_state *state, 336static int btrfsic_test_for_metadata(struct btrfsic_state *state,
338 char **datav, unsigned int num_pages); 337 char **datav, unsigned int num_pages);
339static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 338static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
@@ -1687,7 +1686,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1687 for (i = 0; i < num_pages;) { 1686 for (i = 0; i < num_pages;) {
1688 struct bio *bio; 1687 struct bio *bio;
1689 unsigned int j; 1688 unsigned int j;
1690 DECLARE_COMPLETION_ONSTACK(complete);
1691 1689
1692 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); 1690 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
1693 if (!bio) { 1691 if (!bio) {
@@ -1698,8 +1696,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1698 } 1696 }
1699 bio->bi_bdev = block_ctx->dev->bdev; 1697 bio->bi_bdev = block_ctx->dev->bdev;
1700 bio->bi_sector = dev_bytenr >> 9; 1698 bio->bi_sector = dev_bytenr >> 9;
1701 bio->bi_end_io = btrfsic_complete_bio_end_io;
1702 bio->bi_private = &complete;
1703 1699
1704 for (j = i; j < num_pages; j++) { 1700 for (j = i; j < num_pages; j++) {
1705 ret = bio_add_page(bio, block_ctx->pagev[j], 1701 ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1712,12 +1708,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1712 "btrfsic: error, failed to add a single page!\n"); 1708 "btrfsic: error, failed to add a single page!\n");
1713 return -1; 1709 return -1;
1714 } 1710 }
1715 submit_bio(READ, bio); 1711 if (submit_bio_wait(READ, bio)) {
1716
1717 /* this will also unplug the queue */
1718 wait_for_completion(&complete);
1719
1720 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1721 printk(KERN_INFO 1712 printk(KERN_INFO
1722 "btrfsic: read error at logical %llu dev %s!\n", 1713 "btrfsic: read error at logical %llu dev %s!\n",
1723 block_ctx->start, block_ctx->dev->name); 1714 block_ctx->start, block_ctx->dev->name);
@@ -1740,11 +1731,6 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1740 return block_ctx->len; 1731 return block_ctx->len;
1741} 1732}
1742 1733
1743static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1744{
1745 complete((struct completion *)bio->bi_private);
1746}
1747
1748static void btrfsic_dump_database(struct btrfsic_state *state) 1734static void btrfsic_dump_database(struct btrfsic_state *state)
1749{ 1735{
1750 struct list_head *elem_all; 1736 struct list_head *elem_all;
@@ -3008,14 +2994,12 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3008 return submit_bh(rw, bh); 2994 return submit_bh(rw, bh);
3009} 2995}
3010 2996
3011void btrfsic_submit_bio(int rw, struct bio *bio) 2997static void __btrfsic_submit_bio(int rw, struct bio *bio)
3012{ 2998{
3013 struct btrfsic_dev_state *dev_state; 2999 struct btrfsic_dev_state *dev_state;
3014 3000
3015 if (!btrfsic_is_initialized) { 3001 if (!btrfsic_is_initialized)
3016 submit_bio(rw, bio);
3017 return; 3002 return;
3018 }
3019 3003
3020 mutex_lock(&btrfsic_mutex); 3004 mutex_lock(&btrfsic_mutex);
3021 /* since btrfsic_submit_bio() is also called before 3005 /* since btrfsic_submit_bio() is also called before
@@ -3106,10 +3090,20 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
3106 } 3090 }
3107leave: 3091leave:
3108 mutex_unlock(&btrfsic_mutex); 3092 mutex_unlock(&btrfsic_mutex);
3093}
3109 3094
3095void btrfsic_submit_bio(int rw, struct bio *bio)
3096{
3097 __btrfsic_submit_bio(rw, bio);
3110 submit_bio(rw, bio); 3098 submit_bio(rw, bio);
3111} 3099}
3112 3100
3101int btrfsic_submit_bio_wait(int rw, struct bio *bio)
3102{
3103 __btrfsic_submit_bio(rw, bio);
3104 return submit_bio_wait(rw, bio);
3105}
3106
3113int btrfsic_mount(struct btrfs_root *root, 3107int btrfsic_mount(struct btrfs_root *root,
3114 struct btrfs_fs_devices *fs_devices, 3108 struct btrfs_fs_devices *fs_devices,
3115 int including_extent_data, u32 print_mask) 3109 int including_extent_data, u32 print_mask)
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 8b59175cc502..13b8566c97ab 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -22,9 +22,11 @@
22#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 22#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
23int btrfsic_submit_bh(int rw, struct buffer_head *bh); 23int btrfsic_submit_bh(int rw, struct buffer_head *bh);
24void btrfsic_submit_bio(int rw, struct bio *bio); 24void btrfsic_submit_bio(int rw, struct bio *bio);
25int btrfsic_submit_bio_wait(int rw, struct bio *bio);
25#else 26#else
26#define btrfsic_submit_bh submit_bh 27#define btrfsic_submit_bh submit_bh
27#define btrfsic_submit_bio submit_bio 28#define btrfsic_submit_bio submit_bio
29#define btrfsic_submit_bio_wait submit_bio_wait
28#endif 30#endif
29 31
30int btrfsic_mount(struct btrfs_root *root, 32int btrfsic_mount(struct btrfs_root *root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 45d98d01028f..9c01509dd8ab 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -767,20 +767,19 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
767 if (!path) 767 if (!path)
768 return -ENOMEM; 768 return -ENOMEM;
769 769
770 if (metadata) {
771 key.objectid = bytenr;
772 key.type = BTRFS_METADATA_ITEM_KEY;
773 key.offset = offset;
774 } else {
775 key.objectid = bytenr;
776 key.type = BTRFS_EXTENT_ITEM_KEY;
777 key.offset = offset;
778 }
779
780 if (!trans) { 770 if (!trans) {
781 path->skip_locking = 1; 771 path->skip_locking = 1;
782 path->search_commit_root = 1; 772 path->search_commit_root = 1;
783 } 773 }
774
775search_again:
776 key.objectid = bytenr;
777 key.offset = offset;
778 if (metadata)
779 key.type = BTRFS_METADATA_ITEM_KEY;
780 else
781 key.type = BTRFS_EXTENT_ITEM_KEY;
782
784again: 783again:
785 ret = btrfs_search_slot(trans, root->fs_info->extent_root, 784 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
786 &key, path, 0, 0); 785 &key, path, 0, 0);
@@ -788,7 +787,6 @@ again:
788 goto out_free; 787 goto out_free;
789 788
790 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { 789 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
791 metadata = 0;
792 if (path->slots[0]) { 790 if (path->slots[0]) {
793 path->slots[0]--; 791 path->slots[0]--;
794 btrfs_item_key_to_cpu(path->nodes[0], &key, 792 btrfs_item_key_to_cpu(path->nodes[0], &key,
@@ -855,7 +853,7 @@ again:
855 mutex_lock(&head->mutex); 853 mutex_lock(&head->mutex);
856 mutex_unlock(&head->mutex); 854 mutex_unlock(&head->mutex);
857 btrfs_put_delayed_ref(&head->node); 855 btrfs_put_delayed_ref(&head->node);
858 goto again; 856 goto search_again;
859 } 857 }
860 if (head->extent_op && head->extent_op->update_flags) 858 if (head->extent_op && head->extent_op->update_flags)
861 extent_flags |= head->extent_op->flags_to_set; 859 extent_flags |= head->extent_op->flags_to_set;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8e457fca0a0b..ff43802a7c88 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1952,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
1952 return err; 1952 return err;
1953} 1953}
1954 1954
1955static void repair_io_failure_callback(struct bio *bio, int err)
1956{
1957 complete(bio->bi_private);
1958}
1959
1960/* 1955/*
1961 * this bypasses the standard btrfs submit functions deliberately, as 1956 * this bypasses the standard btrfs submit functions deliberately, as
1962 * the standard behavior is to write all copies in a raid setup. here we only 1957 * the standard behavior is to write all copies in a raid setup. here we only
@@ -1973,7 +1968,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1973{ 1968{
1974 struct bio *bio; 1969 struct bio *bio;
1975 struct btrfs_device *dev; 1970 struct btrfs_device *dev;
1976 DECLARE_COMPLETION_ONSTACK(compl);
1977 u64 map_length = 0; 1971 u64 map_length = 0;
1978 u64 sector; 1972 u64 sector;
1979 struct btrfs_bio *bbio = NULL; 1973 struct btrfs_bio *bbio = NULL;
@@ -1990,8 +1984,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1990 bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 1984 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1991 if (!bio) 1985 if (!bio)
1992 return -EIO; 1986 return -EIO;
1993 bio->bi_private = &compl;
1994 bio->bi_end_io = repair_io_failure_callback;
1995 bio->bi_size = 0; 1987 bio->bi_size = 0;
1996 map_length = length; 1988 map_length = length;
1997 1989
@@ -2012,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
2012 } 2004 }
2013 bio->bi_bdev = dev->bdev; 2005 bio->bi_bdev = dev->bdev;
2014 bio_add_page(bio, page, length, start - page_offset(page)); 2006 bio_add_page(bio, page, length, start - page_offset(page));
2015 btrfsic_submit_bio(WRITE_SYNC, bio);
2016 wait_for_completion(&compl);
2017 2007
2018 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2008 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
2019 /* try to remap that extent elsewhere? */ 2009 /* try to remap that extent elsewhere? */
2020 bio_put(bio); 2010 bio_put(bio);
2021 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); 2011 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a111622598b0..21da5762b0b1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2121,7 +2121,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2121 2121
2122 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 2122 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
2123 if (err == -EINTR) 2123 if (err == -EINTR)
2124 goto out; 2124 goto out_drop_write;
2125 dentry = lookup_one_len(vol_args->name, parent, namelen); 2125 dentry = lookup_one_len(vol_args->name, parent, namelen);
2126 if (IS_ERR(dentry)) { 2126 if (IS_ERR(dentry)) {
2127 err = PTR_ERR(dentry); 2127 err = PTR_ERR(dentry);
@@ -2284,6 +2284,7 @@ out_dput:
2284 dput(dentry); 2284 dput(dentry);
2285out_unlock_dir: 2285out_unlock_dir:
2286 mutex_unlock(&dir->i_mutex); 2286 mutex_unlock(&dir->i_mutex);
2287out_drop_write:
2287 mnt_drop_write_file(file); 2288 mnt_drop_write_file(file);
2288out: 2289out:
2289 kfree(vol_args); 2290 kfree(vol_args);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ce459a7cb16d..429c73c374b8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -571,7 +571,9 @@ static int is_cowonly_root(u64 root_objectid)
571 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 571 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
572 root_objectid == BTRFS_DEV_TREE_OBJECTID || 572 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
573 root_objectid == BTRFS_TREE_LOG_OBJECTID || 573 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
574 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 574 root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
575 root_objectid == BTRFS_UUID_TREE_OBJECTID ||
576 root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
575 return 1; 577 return 1;
576 return 0; 578 return 0;
577} 579}
@@ -1264,10 +1266,10 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
1264} 1266}
1265 1267
1266/* 1268/*
1267 * helper to update/delete the 'address of tree root -> reloc tree' 1269 * helper to delete the 'address of tree root -> reloc tree'
1268 * mapping 1270 * mapping
1269 */ 1271 */
1270static int __update_reloc_root(struct btrfs_root *root, int del) 1272static void __del_reloc_root(struct btrfs_root *root)
1271{ 1273{
1272 struct rb_node *rb_node; 1274 struct rb_node *rb_node;
1273 struct mapping_node *node = NULL; 1275 struct mapping_node *node = NULL;
@@ -1275,7 +1277,7 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
1275 1277
1276 spin_lock(&rc->reloc_root_tree.lock); 1278 spin_lock(&rc->reloc_root_tree.lock);
1277 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1279 rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1278 root->commit_root->start); 1280 root->node->start);
1279 if (rb_node) { 1281 if (rb_node) {
1280 node = rb_entry(rb_node, struct mapping_node, rb_node); 1282 node = rb_entry(rb_node, struct mapping_node, rb_node);
1281 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1283 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1283,23 +1285,45 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
1283 spin_unlock(&rc->reloc_root_tree.lock); 1285 spin_unlock(&rc->reloc_root_tree.lock);
1284 1286
1285 if (!node) 1287 if (!node)
1286 return 0; 1288 return;
1287 BUG_ON((struct btrfs_root *)node->data != root); 1289 BUG_ON((struct btrfs_root *)node->data != root);
1288 1290
1289 if (!del) { 1291 spin_lock(&root->fs_info->trans_lock);
1290 spin_lock(&rc->reloc_root_tree.lock); 1292 list_del_init(&root->root_list);
1291 node->bytenr = root->node->start; 1293 spin_unlock(&root->fs_info->trans_lock);
1292 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1294 kfree(node);
1293 node->bytenr, &node->rb_node); 1295}
1294 spin_unlock(&rc->reloc_root_tree.lock); 1296
1295 if (rb_node) 1297/*
1296 backref_tree_panic(rb_node, -EEXIST, node->bytenr); 1298 * helper to update the 'address of tree root -> reloc tree'
1297 } else { 1299 * mapping
1298 spin_lock(&root->fs_info->trans_lock); 1300 */
1299 list_del_init(&root->root_list); 1301static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
1300 spin_unlock(&root->fs_info->trans_lock); 1302{
1301 kfree(node); 1303 struct rb_node *rb_node;
1304 struct mapping_node *node = NULL;
1305 struct reloc_control *rc = root->fs_info->reloc_ctl;
1306
1307 spin_lock(&rc->reloc_root_tree.lock);
1308 rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1309 root->node->start);
1310 if (rb_node) {
1311 node = rb_entry(rb_node, struct mapping_node, rb_node);
1312 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1302 } 1313 }
1314 spin_unlock(&rc->reloc_root_tree.lock);
1315
1316 if (!node)
1317 return 0;
1318 BUG_ON((struct btrfs_root *)node->data != root);
1319
1320 spin_lock(&rc->reloc_root_tree.lock);
1321 node->bytenr = new_bytenr;
1322 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1323 node->bytenr, &node->rb_node);
1324 spin_unlock(&rc->reloc_root_tree.lock);
1325 if (rb_node)
1326 backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1303 return 0; 1327 return 0;
1304} 1328}
1305 1329
@@ -1420,7 +1444,6 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1420{ 1444{
1421 struct btrfs_root *reloc_root; 1445 struct btrfs_root *reloc_root;
1422 struct btrfs_root_item *root_item; 1446 struct btrfs_root_item *root_item;
1423 int del = 0;
1424 int ret; 1447 int ret;
1425 1448
1426 if (!root->reloc_root) 1449 if (!root->reloc_root)
@@ -1432,11 +1455,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1432 if (root->fs_info->reloc_ctl->merge_reloc_tree && 1455 if (root->fs_info->reloc_ctl->merge_reloc_tree &&
1433 btrfs_root_refs(root_item) == 0) { 1456 btrfs_root_refs(root_item) == 0) {
1434 root->reloc_root = NULL; 1457 root->reloc_root = NULL;
1435 del = 1; 1458 __del_reloc_root(reloc_root);
1436 } 1459 }
1437 1460
1438 __update_reloc_root(reloc_root, del);
1439
1440 if (reloc_root->commit_root != reloc_root->node) { 1461 if (reloc_root->commit_root != reloc_root->node) {
1441 btrfs_set_root_node(root_item, reloc_root->node); 1462 btrfs_set_root_node(root_item, reloc_root->node);
1442 free_extent_buffer(reloc_root->commit_root); 1463 free_extent_buffer(reloc_root->commit_root);
@@ -2287,7 +2308,7 @@ void free_reloc_roots(struct list_head *list)
2287 while (!list_empty(list)) { 2308 while (!list_empty(list)) {
2288 reloc_root = list_entry(list->next, struct btrfs_root, 2309 reloc_root = list_entry(list->next, struct btrfs_root,
2289 root_list); 2310 root_list);
2290 __update_reloc_root(reloc_root, 1); 2311 __del_reloc_root(reloc_root);
2291 free_extent_buffer(reloc_root->node); 2312 free_extent_buffer(reloc_root->node);
2292 free_extent_buffer(reloc_root->commit_root); 2313 free_extent_buffer(reloc_root->commit_root);
2293 kfree(reloc_root); 2314 kfree(reloc_root);
@@ -2332,7 +2353,7 @@ again:
2332 2353
2333 ret = merge_reloc_root(rc, root); 2354 ret = merge_reloc_root(rc, root);
2334 if (ret) { 2355 if (ret) {
2335 __update_reloc_root(reloc_root, 1); 2356 __del_reloc_root(reloc_root);
2336 free_extent_buffer(reloc_root->node); 2357 free_extent_buffer(reloc_root->node);
2337 free_extent_buffer(reloc_root->commit_root); 2358 free_extent_buffer(reloc_root->commit_root);
2338 kfree(reloc_root); 2359 kfree(reloc_root);
@@ -2388,6 +2409,13 @@ out:
2388 btrfs_std_error(root->fs_info, ret); 2409 btrfs_std_error(root->fs_info, ret);
2389 if (!list_empty(&reloc_roots)) 2410 if (!list_empty(&reloc_roots))
2390 free_reloc_roots(&reloc_roots); 2411 free_reloc_roots(&reloc_roots);
2412
2413 /* new reloc root may be added */
2414 mutex_lock(&root->fs_info->reloc_mutex);
2415 list_splice_init(&rc->reloc_roots, &reloc_roots);
2416 mutex_unlock(&root->fs_info->reloc_mutex);
2417 if (!list_empty(&reloc_roots))
2418 free_reloc_roots(&reloc_roots);
2391 } 2419 }
2392 2420
2393 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 2421 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
@@ -4522,6 +4550,11 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4522 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4550 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4523 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4551 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4524 4552
4553 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4554 if (buf == root->node)
4555 __update_reloc_root(root, cow->start);
4556 }
4557
4525 level = btrfs_header_level(buf); 4558 level = btrfs_header_level(buf);
4526 if (btrfs_header_generation(buf) <= 4559 if (btrfs_header_generation(buf) <=
4527 btrfs_root_last_snapshot(&root->root_item)) 4560 btrfs_root_last_snapshot(&root->root_item))
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 561e2f16ba3e..1fd3f33c330a 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -208,7 +208,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
208 int is_metadata, int have_csum, 208 int is_metadata, int have_csum,
209 const u8 *csum, u64 generation, 209 const u8 *csum, u64 generation,
210 u16 csum_size); 210 u16 csum_size);
211static void scrub_complete_bio_end_io(struct bio *bio, int err);
212static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 211static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
213 struct scrub_block *sblock_good, 212 struct scrub_block *sblock_good,
214 int force_write); 213 int force_write);
@@ -1294,7 +1293,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1294 for (page_num = 0; page_num < sblock->page_count; page_num++) { 1293 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1295 struct bio *bio; 1294 struct bio *bio;
1296 struct scrub_page *page = sblock->pagev[page_num]; 1295 struct scrub_page *page = sblock->pagev[page_num];
1297 DECLARE_COMPLETION_ONSTACK(complete);
1298 1296
1299 if (page->dev->bdev == NULL) { 1297 if (page->dev->bdev == NULL) {
1300 page->io_error = 1; 1298 page->io_error = 1;
@@ -1311,18 +1309,11 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1311 } 1309 }
1312 bio->bi_bdev = page->dev->bdev; 1310 bio->bi_bdev = page->dev->bdev;
1313 bio->bi_sector = page->physical >> 9; 1311 bio->bi_sector = page->physical >> 9;
1314 bio->bi_end_io = scrub_complete_bio_end_io;
1315 bio->bi_private = &complete;
1316 1312
1317 bio_add_page(bio, page->page, PAGE_SIZE, 0); 1313 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1318 btrfsic_submit_bio(READ, bio); 1314 if (btrfsic_submit_bio_wait(READ, bio))
1319
1320 /* this will also unplug the queue */
1321 wait_for_completion(&complete);
1322
1323 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
1324 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1325 sblock->no_io_error_seen = 0; 1315 sblock->no_io_error_seen = 0;
1316
1326 bio_put(bio); 1317 bio_put(bio);
1327 } 1318 }
1328 1319
@@ -1391,11 +1382,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1391 sblock->checksum_error = 1; 1382 sblock->checksum_error = 1;
1392} 1383}
1393 1384
1394static void scrub_complete_bio_end_io(struct bio *bio, int err)
1395{
1396 complete((struct completion *)bio->bi_private);
1397}
1398
1399static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 1385static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1400 struct scrub_block *sblock_good, 1386 struct scrub_block *sblock_good,
1401 int force_write) 1387 int force_write)
@@ -1430,7 +1416,6 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1430 sblock_bad->checksum_error || page_bad->io_error) { 1416 sblock_bad->checksum_error || page_bad->io_error) {
1431 struct bio *bio; 1417 struct bio *bio;
1432 int ret; 1418 int ret;
1433 DECLARE_COMPLETION_ONSTACK(complete);
1434 1419
1435 if (!page_bad->dev->bdev) { 1420 if (!page_bad->dev->bdev) {
1436 printk_ratelimited(KERN_WARNING 1421 printk_ratelimited(KERN_WARNING
@@ -1443,19 +1428,14 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1443 return -EIO; 1428 return -EIO;
1444 bio->bi_bdev = page_bad->dev->bdev; 1429 bio->bi_bdev = page_bad->dev->bdev;
1445 bio->bi_sector = page_bad->physical >> 9; 1430 bio->bi_sector = page_bad->physical >> 9;
1446 bio->bi_end_io = scrub_complete_bio_end_io;
1447 bio->bi_private = &complete;
1448 1431
1449 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); 1432 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1450 if (PAGE_SIZE != ret) { 1433 if (PAGE_SIZE != ret) {
1451 bio_put(bio); 1434 bio_put(bio);
1452 return -EIO; 1435 return -EIO;
1453 } 1436 }
1454 btrfsic_submit_bio(WRITE, bio);
1455 1437
1456 /* this will also unplug the queue */ 1438 if (btrfsic_submit_bio_wait(WRITE, bio)) {
1457 wait_for_completion(&complete);
1458 if (!bio_flagged(bio, BIO_UPTODATE)) {
1459 btrfs_dev_stat_inc_and_print(page_bad->dev, 1439 btrfs_dev_stat_inc_and_print(page_bad->dev,
1460 BTRFS_DEV_STAT_WRITE_ERRS); 1440 BTRFS_DEV_STAT_WRITE_ERRS);
1461 btrfs_dev_replace_stats_inc( 1441 btrfs_dev_replace_stats_inc(
@@ -3375,7 +3355,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3375 struct bio *bio; 3355 struct bio *bio;
3376 struct btrfs_device *dev; 3356 struct btrfs_device *dev;
3377 int ret; 3357 int ret;
3378 DECLARE_COMPLETION_ONSTACK(compl);
3379 3358
3380 dev = sctx->wr_ctx.tgtdev; 3359 dev = sctx->wr_ctx.tgtdev;
3381 if (!dev) 3360 if (!dev)
@@ -3392,8 +3371,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3392 spin_unlock(&sctx->stat_lock); 3371 spin_unlock(&sctx->stat_lock);
3393 return -ENOMEM; 3372 return -ENOMEM;
3394 } 3373 }
3395 bio->bi_private = &compl;
3396 bio->bi_end_io = scrub_complete_bio_end_io;
3397 bio->bi_size = 0; 3374 bio->bi_size = 0;
3398 bio->bi_sector = physical_for_dev_replace >> 9; 3375 bio->bi_sector = physical_for_dev_replace >> 9;
3399 bio->bi_bdev = dev->bdev; 3376 bio->bi_bdev = dev->bdev;
@@ -3404,10 +3381,8 @@ leave_with_eio:
3404 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); 3381 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
3405 return -EIO; 3382 return -EIO;
3406 } 3383 }
3407 btrfsic_submit_bio(WRITE_SYNC, bio);
3408 wait_for_completion(&compl);
3409 3384
3410 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 3385 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
3411 goto leave_with_eio; 3386 goto leave_with_eio;
3412 3387
3413 bio_put(bio); 3388 bio_put(bio);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6837fe87f3a6..945d1db98f26 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4723,8 +4723,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4723 } 4723 }
4724 4724
4725 if (!access_ok(VERIFY_READ, arg->clone_sources, 4725 if (!access_ok(VERIFY_READ, arg->clone_sources,
4726 sizeof(*arg->clone_sources * 4726 sizeof(*arg->clone_sources) *
4727 arg->clone_sources_count))) { 4727 arg->clone_sources_count)) {
4728 ret = -EFAULT; 4728 ret = -EFAULT;
4729 goto out; 4729 goto out;
4730 } 4730 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2d8ac1bf0cf9..d71a11d13dfa 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -432,7 +432,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
432 } else { 432 } else {
433 printk(KERN_INFO "btrfs: setting nodatacow\n"); 433 printk(KERN_INFO "btrfs: setting nodatacow\n");
434 } 434 }
435 info->compress_type = BTRFS_COMPRESS_NONE;
436 btrfs_clear_opt(info->mount_opt, COMPRESS); 435 btrfs_clear_opt(info->mount_opt, COMPRESS);
437 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 436 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
438 btrfs_set_opt(info->mount_opt, NODATACOW); 437 btrfs_set_opt(info->mount_opt, NODATACOW);
@@ -461,7 +460,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
461 btrfs_set_fs_incompat(info, COMPRESS_LZO); 460 btrfs_set_fs_incompat(info, COMPRESS_LZO);
462 } else if (strncmp(args[0].from, "no", 2) == 0) { 461 } else if (strncmp(args[0].from, "no", 2) == 0) {
463 compress_type = "no"; 462 compress_type = "no";
464 info->compress_type = BTRFS_COMPRESS_NONE;
465 btrfs_clear_opt(info->mount_opt, COMPRESS); 463 btrfs_clear_opt(info->mount_opt, COMPRESS);
466 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 464 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
467 compress_force = false; 465 compress_force = false;
@@ -474,9 +472,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
474 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 472 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
475 pr_info("btrfs: force %s compression\n", 473 pr_info("btrfs: force %s compression\n",
476 compress_type); 474 compress_type);
477 } else 475 } else if (btrfs_test_opt(root, COMPRESS)) {
478 pr_info("btrfs: use %s compression\n", 476 pr_info("btrfs: use %s compression\n",
479 compress_type); 477 compress_type);
478 }
480 break; 479 break;
481 case Opt_ssd: 480 case Opt_ssd:
482 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 481 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6df8bd481425..ec3ba43b9faa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -210,13 +210,17 @@ static int readpage_nounlock(struct file *filp, struct page *page)
210 if (err < 0) { 210 if (err < 0) {
211 SetPageError(page); 211 SetPageError(page);
212 goto out; 212 goto out;
213 } else if (err < PAGE_CACHE_SIZE) { 213 } else {
214 if (err < PAGE_CACHE_SIZE) {
214 /* zero fill remainder of page */ 215 /* zero fill remainder of page */
215 zero_user_segment(page, err, PAGE_CACHE_SIZE); 216 zero_user_segment(page, err, PAGE_CACHE_SIZE);
217 } else {
218 flush_dcache_page(page);
219 }
216 } 220 }
217 SetPageUptodate(page); 221 SetPageUptodate(page);
218 222
219 if (err == 0) 223 if (err >= 0)
220 ceph_readpage_to_fscache(inode, page); 224 ceph_readpage_to_fscache(inode, page);
221 225
222out: 226out:
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 7db2e6ca4b8f..8c44fdd4e1c3 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
324{ 324{
325 struct ceph_inode_info *ci = ceph_inode(inode); 325 struct ceph_inode_info *ci = ceph_inode(inode);
326 326
327 if (!PageFsCache(page))
328 return;
329
327 fscache_wait_on_page_write(ci->fscache, page); 330 fscache_wait_on_page_write(ci->fscache, page);
328 fscache_uncache_page(ci->fscache, page); 331 fscache_uncache_page(ci->fscache, page);
329} 332}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 13976c33332e..3c0a4bd74996 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
897 * caller should hold i_ceph_lock. 897 * caller should hold i_ceph_lock.
898 * caller will not hold session s_mutex if called from destroy_inode. 898 * caller will not hold session s_mutex if called from destroy_inode.
899 */ 899 */
900void __ceph_remove_cap(struct ceph_cap *cap) 900void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
901{ 901{
902 struct ceph_mds_session *session = cap->session; 902 struct ceph_mds_session *session = cap->session;
903 struct ceph_inode_info *ci = cap->ci; 903 struct ceph_inode_info *ci = cap->ci;
@@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap)
909 909
910 /* remove from session list */ 910 /* remove from session list */
911 spin_lock(&session->s_cap_lock); 911 spin_lock(&session->s_cap_lock);
912 /*
913 * s_cap_reconnect is protected by s_cap_lock. no one changes
914 * s_cap_gen while session is in the reconnect state.
915 */
916 if (queue_release &&
917 (!session->s_cap_reconnect ||
918 cap->cap_gen == session->s_cap_gen))
919 __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
920 cap->mseq, cap->issue_seq);
921
912 if (session->s_cap_iterator == cap) { 922 if (session->s_cap_iterator == cap) {
913 /* not yet, we are iterating over this very cap */ 923 /* not yet, we are iterating over this very cap */
914 dout("__ceph_remove_cap delaying %p removal from session %p\n", 924 dout("__ceph_remove_cap delaying %p removal from session %p\n",
@@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
1023 struct ceph_mds_cap_release *head; 1033 struct ceph_mds_cap_release *head;
1024 struct ceph_mds_cap_item *item; 1034 struct ceph_mds_cap_item *item;
1025 1035
1026 spin_lock(&session->s_cap_lock);
1027 BUG_ON(!session->s_num_cap_releases); 1036 BUG_ON(!session->s_num_cap_releases);
1028 msg = list_first_entry(&session->s_cap_releases, 1037 msg = list_first_entry(&session->s_cap_releases,
1029 struct ceph_msg, list_head); 1038 struct ceph_msg, list_head);
@@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
1052 (int)CEPH_CAPS_PER_RELEASE, 1061 (int)CEPH_CAPS_PER_RELEASE,
1053 (int)msg->front.iov_len); 1062 (int)msg->front.iov_len);
1054 } 1063 }
1055 spin_unlock(&session->s_cap_lock);
1056} 1064}
1057 1065
1058/* 1066/*
@@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode)
1067 p = rb_first(&ci->i_caps); 1075 p = rb_first(&ci->i_caps);
1068 while (p) { 1076 while (p) {
1069 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); 1077 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
1070 struct ceph_mds_session *session = cap->session;
1071
1072 __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
1073 cap->mseq, cap->issue_seq);
1074 p = rb_next(p); 1078 p = rb_next(p);
1075 __ceph_remove_cap(cap); 1079 __ceph_remove_cap(cap, true);
1076 } 1080 }
1077} 1081}
1078 1082
@@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2791 } 2795 }
2792 spin_unlock(&mdsc->cap_dirty_lock); 2796 spin_unlock(&mdsc->cap_dirty_lock);
2793 } 2797 }
2794 __ceph_remove_cap(cap); 2798 __ceph_remove_cap(cap, false);
2795 } 2799 }
2796 /* else, we already released it */ 2800 /* else, we already released it */
2797 2801
@@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2931 if (!inode) { 2935 if (!inode) {
2932 dout(" i don't have ino %llx\n", vino.ino); 2936 dout(" i don't have ino %llx\n", vino.ino);
2933 2937
2934 if (op == CEPH_CAP_OP_IMPORT) 2938 if (op == CEPH_CAP_OP_IMPORT) {
2939 spin_lock(&session->s_cap_lock);
2935 __queue_cap_release(session, vino.ino, cap_id, 2940 __queue_cap_release(session, vino.ino, cap_id,
2936 mseq, seq); 2941 mseq, seq);
2942 spin_unlock(&session->s_cap_lock);
2943 }
2937 goto flush_cap_releases; 2944 goto flush_cap_releases;
2938 } 2945 }
2939 2946
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 868b61d56cac..2a0bcaeb189a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -352,8 +352,18 @@ more:
352 } 352 }
353 353
354 /* note next offset and last dentry name */ 354 /* note next offset and last dentry name */
355 rinfo = &req->r_reply_info;
356 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
357 frag = le32_to_cpu(rinfo->dir_dir->frag);
358 if (ceph_frag_is_leftmost(frag))
359 fi->next_offset = 2;
360 else
361 fi->next_offset = 0;
362 off = fi->next_offset;
363 }
355 fi->offset = fi->next_offset; 364 fi->offset = fi->next_offset;
356 fi->last_readdir = req; 365 fi->last_readdir = req;
366 fi->frag = frag;
357 367
358 if (req->r_reply_info.dir_end) { 368 if (req->r_reply_info.dir_end) {
359 kfree(fi->last_name); 369 kfree(fi->last_name);
@@ -363,7 +373,6 @@ more:
363 else 373 else
364 fi->next_offset = 0; 374 fi->next_offset = 0;
365 } else { 375 } else {
366 rinfo = &req->r_reply_info;
367 err = note_last_dentry(fi, 376 err = note_last_dentry(fi,
368 rinfo->dir_dname[rinfo->dir_nr-1], 377 rinfo->dir_dname[rinfo->dir_nr-1],
369 rinfo->dir_dname_len[rinfo->dir_nr-1]); 378 rinfo->dir_dname_len[rinfo->dir_nr-1]);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8549a48115f7..278fd2891288 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode,
577 int issued = 0, implemented; 577 int issued = 0, implemented;
578 struct timespec mtime, atime, ctime; 578 struct timespec mtime, atime, ctime;
579 u32 nsplits; 579 u32 nsplits;
580 struct ceph_inode_frag *frag;
581 struct rb_node *rb_node;
580 struct ceph_buffer *xattr_blob = NULL; 582 struct ceph_buffer *xattr_blob = NULL;
581 int err = 0; 583 int err = 0;
582 int queue_trunc = 0; 584 int queue_trunc = 0;
@@ -751,15 +753,38 @@ no_change:
751 /* FIXME: move me up, if/when version reflects fragtree changes */ 753 /* FIXME: move me up, if/when version reflects fragtree changes */
752 nsplits = le32_to_cpu(info->fragtree.nsplits); 754 nsplits = le32_to_cpu(info->fragtree.nsplits);
753 mutex_lock(&ci->i_fragtree_mutex); 755 mutex_lock(&ci->i_fragtree_mutex);
756 rb_node = rb_first(&ci->i_fragtree);
754 for (i = 0; i < nsplits; i++) { 757 for (i = 0; i < nsplits; i++) {
755 u32 id = le32_to_cpu(info->fragtree.splits[i].frag); 758 u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
756 struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); 759 frag = NULL;
757 760 while (rb_node) {
758 if (IS_ERR(frag)) 761 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
759 continue; 762 if (ceph_frag_compare(frag->frag, id) >= 0) {
763 if (frag->frag != id)
764 frag = NULL;
765 else
766 rb_node = rb_next(rb_node);
767 break;
768 }
769 rb_node = rb_next(rb_node);
770 rb_erase(&frag->node, &ci->i_fragtree);
771 kfree(frag);
772 frag = NULL;
773 }
774 if (!frag) {
775 frag = __get_or_create_frag(ci, id);
776 if (IS_ERR(frag))
777 continue;
778 }
760 frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); 779 frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
761 dout(" frag %x split by %d\n", frag->frag, frag->split_by); 780 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
762 } 781 }
782 while (rb_node) {
783 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
784 rb_node = rb_next(rb_node);
785 rb_erase(&frag->node, &ci->i_fragtree);
786 kfree(frag);
787 }
763 mutex_unlock(&ci->i_fragtree_mutex); 788 mutex_unlock(&ci->i_fragtree_mutex);
764 789
765 /* were we issued a capability? */ 790 /* were we issued a capability? */
@@ -953,7 +978,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
953 struct ceph_mds_reply_inode *ininfo; 978 struct ceph_mds_reply_inode *ininfo;
954 struct ceph_vino vino; 979 struct ceph_vino vino;
955 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 980 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
956 int i = 0;
957 int err = 0; 981 int err = 0;
958 982
959 dout("fill_trace %p is_dentry %d is_target %d\n", req, 983 dout("fill_trace %p is_dentry %d is_target %d\n", req,
@@ -1014,6 +1038,29 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1014 } 1038 }
1015 } 1039 }
1016 1040
1041 if (rinfo->head->is_target) {
1042 vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
1043 vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
1044
1045 in = ceph_get_inode(sb, vino);
1046 if (IS_ERR(in)) {
1047 err = PTR_ERR(in);
1048 goto done;
1049 }
1050 req->r_target_inode = in;
1051
1052 err = fill_inode(in, &rinfo->targeti, NULL,
1053 session, req->r_request_started,
1054 (le32_to_cpu(rinfo->head->result) == 0) ?
1055 req->r_fmode : -1,
1056 &req->r_caps_reservation);
1057 if (err < 0) {
1058 pr_err("fill_inode badness %p %llx.%llx\n",
1059 in, ceph_vinop(in));
1060 goto done;
1061 }
1062 }
1063
1017 /* 1064 /*
1018 * ignore null lease/binding on snapdir ENOENT, or else we 1065 * ignore null lease/binding on snapdir ENOENT, or else we
1019 * will have trouble splicing in the virtual snapdir later 1066 * will have trouble splicing in the virtual snapdir later
@@ -1083,7 +1130,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1083 ceph_dentry(req->r_old_dentry)->offset); 1130 ceph_dentry(req->r_old_dentry)->offset);
1084 1131
1085 dn = req->r_old_dentry; /* use old_dentry */ 1132 dn = req->r_old_dentry; /* use old_dentry */
1086 in = dn->d_inode;
1087 } 1133 }
1088 1134
1089 /* null dentry? */ 1135 /* null dentry? */
@@ -1105,44 +1151,28 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1105 } 1151 }
1106 1152
1107 /* attach proper inode */ 1153 /* attach proper inode */
1108 ininfo = rinfo->targeti.in; 1154 if (!dn->d_inode) {
1109 vino.ino = le64_to_cpu(ininfo->ino); 1155 ihold(in);
1110 vino.snap = le64_to_cpu(ininfo->snapid);
1111 in = dn->d_inode;
1112 if (!in) {
1113 in = ceph_get_inode(sb, vino);
1114 if (IS_ERR(in)) {
1115 pr_err("fill_trace bad get_inode "
1116 "%llx.%llx\n", vino.ino, vino.snap);
1117 err = PTR_ERR(in);
1118 d_drop(dn);
1119 goto done;
1120 }
1121 dn = splice_dentry(dn, in, &have_lease, true); 1156 dn = splice_dentry(dn, in, &have_lease, true);
1122 if (IS_ERR(dn)) { 1157 if (IS_ERR(dn)) {
1123 err = PTR_ERR(dn); 1158 err = PTR_ERR(dn);
1124 goto done; 1159 goto done;
1125 } 1160 }
1126 req->r_dentry = dn; /* may have spliced */ 1161 req->r_dentry = dn; /* may have spliced */
1127 ihold(in); 1162 } else if (dn->d_inode && dn->d_inode != in) {
1128 } else if (ceph_ino(in) == vino.ino &&
1129 ceph_snap(in) == vino.snap) {
1130 ihold(in);
1131 } else {
1132 dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1163 dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
1133 dn, in, ceph_ino(in), ceph_snap(in), 1164 dn, dn->d_inode, ceph_vinop(dn->d_inode),
1134 vino.ino, vino.snap); 1165 ceph_vinop(in));
1135 have_lease = false; 1166 have_lease = false;
1136 in = NULL;
1137 } 1167 }
1138 1168
1139 if (have_lease) 1169 if (have_lease)
1140 update_dentry_lease(dn, rinfo->dlease, session, 1170 update_dentry_lease(dn, rinfo->dlease, session,
1141 req->r_request_started); 1171 req->r_request_started);
1142 dout(" final dn %p\n", dn); 1172 dout(" final dn %p\n", dn);
1143 i++; 1173 } else if (!req->r_aborted &&
1144 } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 1174 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
1145 req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) { 1175 req->r_op == CEPH_MDS_OP_MKSNAP)) {
1146 struct dentry *dn = req->r_dentry; 1176 struct dentry *dn = req->r_dentry;
1147 1177
1148 /* fill out a snapdir LOOKUPSNAP dentry */ 1178 /* fill out a snapdir LOOKUPSNAP dentry */
@@ -1152,52 +1182,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1152 ininfo = rinfo->targeti.in; 1182 ininfo = rinfo->targeti.in;
1153 vino.ino = le64_to_cpu(ininfo->ino); 1183 vino.ino = le64_to_cpu(ininfo->ino);
1154 vino.snap = le64_to_cpu(ininfo->snapid); 1184 vino.snap = le64_to_cpu(ininfo->snapid);
1155 in = ceph_get_inode(sb, vino);
1156 if (IS_ERR(in)) {
1157 pr_err("fill_inode get_inode badness %llx.%llx\n",
1158 vino.ino, vino.snap);
1159 err = PTR_ERR(in);
1160 d_delete(dn);
1161 goto done;
1162 }
1163 dout(" linking snapped dir %p to dn %p\n", in, dn); 1185 dout(" linking snapped dir %p to dn %p\n", in, dn);
1186 ihold(in);
1164 dn = splice_dentry(dn, in, NULL, true); 1187 dn = splice_dentry(dn, in, NULL, true);
1165 if (IS_ERR(dn)) { 1188 if (IS_ERR(dn)) {
1166 err = PTR_ERR(dn); 1189 err = PTR_ERR(dn);
1167 goto done; 1190 goto done;
1168 } 1191 }
1169 req->r_dentry = dn; /* may have spliced */ 1192 req->r_dentry = dn; /* may have spliced */
1170 ihold(in);
1171 rinfo->head->is_dentry = 1; /* fool notrace handlers */
1172 }
1173
1174 if (rinfo->head->is_target) {
1175 vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
1176 vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
1177
1178 if (in == NULL || ceph_ino(in) != vino.ino ||
1179 ceph_snap(in) != vino.snap) {
1180 in = ceph_get_inode(sb, vino);
1181 if (IS_ERR(in)) {
1182 err = PTR_ERR(in);
1183 goto done;
1184 }
1185 }
1186 req->r_target_inode = in;
1187
1188 err = fill_inode(in,
1189 &rinfo->targeti, NULL,
1190 session, req->r_request_started,
1191 (le32_to_cpu(rinfo->head->result) == 0) ?
1192 req->r_fmode : -1,
1193 &req->r_caps_reservation);
1194 if (err < 0) {
1195 pr_err("fill_inode badness %p %llx.%llx\n",
1196 in, ceph_vinop(in));
1197 goto done;
1198 }
1199 } 1193 }
1200
1201done: 1194done:
1202 dout("fill_trace done err=%d\n", err); 1195 dout("fill_trace done err=%d\n", err);
1203 return err; 1196 return err;
@@ -1247,11 +1240,23 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1247 struct qstr dname; 1240 struct qstr dname;
1248 struct dentry *dn; 1241 struct dentry *dn;
1249 struct inode *in; 1242 struct inode *in;
1250 int err = 0, i; 1243 int err = 0, ret, i;
1251 struct inode *snapdir = NULL; 1244 struct inode *snapdir = NULL;
1252 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; 1245 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
1253 u64 frag = le32_to_cpu(rhead->args.readdir.frag);
1254 struct ceph_dentry_info *di; 1246 struct ceph_dentry_info *di;
1247 u64 r_readdir_offset = req->r_readdir_offset;
1248 u32 frag = le32_to_cpu(rhead->args.readdir.frag);
1249
1250 if (rinfo->dir_dir &&
1251 le32_to_cpu(rinfo->dir_dir->frag) != frag) {
1252 dout("readdir_prepopulate got new frag %x -> %x\n",
1253 frag, le32_to_cpu(rinfo->dir_dir->frag));
1254 frag = le32_to_cpu(rinfo->dir_dir->frag);
1255 if (ceph_frag_is_leftmost(frag))
1256 r_readdir_offset = 2;
1257 else
1258 r_readdir_offset = 0;
1259 }
1255 1260
1256 if (req->r_aborted) 1261 if (req->r_aborted)
1257 return readdir_prepopulate_inodes_only(req, session); 1262 return readdir_prepopulate_inodes_only(req, session);
@@ -1268,6 +1273,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1268 ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir); 1273 ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
1269 } 1274 }
1270 1275
1276 /* FIXME: release caps/leases if error occurs */
1271 for (i = 0; i < rinfo->dir_nr; i++) { 1277 for (i = 0; i < rinfo->dir_nr; i++) {
1272 struct ceph_vino vino; 1278 struct ceph_vino vino;
1273 1279
@@ -1292,9 +1298,10 @@ retry_lookup:
1292 err = -ENOMEM; 1298 err = -ENOMEM;
1293 goto out; 1299 goto out;
1294 } 1300 }
1295 err = ceph_init_dentry(dn); 1301 ret = ceph_init_dentry(dn);
1296 if (err < 0) { 1302 if (ret < 0) {
1297 dput(dn); 1303 dput(dn);
1304 err = ret;
1298 goto out; 1305 goto out;
1299 } 1306 }
1300 } else if (dn->d_inode && 1307 } else if (dn->d_inode &&
@@ -1314,9 +1321,6 @@ retry_lookup:
1314 spin_unlock(&parent->d_lock); 1321 spin_unlock(&parent->d_lock);
1315 } 1322 }
1316 1323
1317 di = dn->d_fsdata;
1318 di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset);
1319
1320 /* inode */ 1324 /* inode */
1321 if (dn->d_inode) { 1325 if (dn->d_inode) {
1322 in = dn->d_inode; 1326 in = dn->d_inode;
@@ -1329,26 +1333,39 @@ retry_lookup:
1329 err = PTR_ERR(in); 1333 err = PTR_ERR(in);
1330 goto out; 1334 goto out;
1331 } 1335 }
1332 dn = splice_dentry(dn, in, NULL, false);
1333 if (IS_ERR(dn))
1334 dn = NULL;
1335 } 1336 }
1336 1337
1337 if (fill_inode(in, &rinfo->dir_in[i], NULL, session, 1338 if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
1338 req->r_request_started, -1, 1339 req->r_request_started, -1,
1339 &req->r_caps_reservation) < 0) { 1340 &req->r_caps_reservation) < 0) {
1340 pr_err("fill_inode badness on %p\n", in); 1341 pr_err("fill_inode badness on %p\n", in);
1342 if (!dn->d_inode)
1343 iput(in);
1344 d_drop(dn);
1341 goto next_item; 1345 goto next_item;
1342 } 1346 }
1343 if (dn) 1347
1344 update_dentry_lease(dn, rinfo->dir_dlease[i], 1348 if (!dn->d_inode) {
1345 req->r_session, 1349 dn = splice_dentry(dn, in, NULL, false);
1346 req->r_request_started); 1350 if (IS_ERR(dn)) {
1351 err = PTR_ERR(dn);
1352 dn = NULL;
1353 goto next_item;
1354 }
1355 }
1356
1357 di = dn->d_fsdata;
1358 di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
1359
1360 update_dentry_lease(dn, rinfo->dir_dlease[i],
1361 req->r_session,
1362 req->r_request_started);
1347next_item: 1363next_item:
1348 if (dn) 1364 if (dn)
1349 dput(dn); 1365 dput(dn);
1350 } 1366 }
1351 req->r_did_prepopulate = true; 1367 if (err == 0)
1368 req->r_did_prepopulate = true;
1352 1369
1353out: 1370out:
1354 if (snapdir) { 1371 if (snapdir) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b7bda5d9611d..d90861f45210 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -43,6 +43,7 @@
43 */ 43 */
44 44
45struct ceph_reconnect_state { 45struct ceph_reconnect_state {
46 int nr_caps;
46 struct ceph_pagelist *pagelist; 47 struct ceph_pagelist *pagelist;
47 bool flock; 48 bool flock;
48}; 49};
@@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
443 INIT_LIST_HEAD(&s->s_waiting); 444 INIT_LIST_HEAD(&s->s_waiting);
444 INIT_LIST_HEAD(&s->s_unsafe); 445 INIT_LIST_HEAD(&s->s_unsafe);
445 s->s_num_cap_releases = 0; 446 s->s_num_cap_releases = 0;
447 s->s_cap_reconnect = 0;
446 s->s_cap_iterator = NULL; 448 s->s_cap_iterator = NULL;
447 INIT_LIST_HEAD(&s->s_cap_releases); 449 INIT_LIST_HEAD(&s->s_cap_releases);
448 INIT_LIST_HEAD(&s->s_cap_releases_done); 450 INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
642 req->r_unsafe_dir = NULL; 644 req->r_unsafe_dir = NULL;
643 } 645 }
644 646
647 complete_all(&req->r_safe_completion);
648
645 ceph_mdsc_put_request(req); 649 ceph_mdsc_put_request(req);
646} 650}
647 651
@@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
986 dout("removing cap %p, ci is %p, inode is %p\n", 990 dout("removing cap %p, ci is %p, inode is %p\n",
987 cap, ci, &ci->vfs_inode); 991 cap, ci, &ci->vfs_inode);
988 spin_lock(&ci->i_ceph_lock); 992 spin_lock(&ci->i_ceph_lock);
989 __ceph_remove_cap(cap); 993 __ceph_remove_cap(cap, false);
990 if (!__ceph_is_any_real_caps(ci)) { 994 if (!__ceph_is_any_real_caps(ci)) {
991 struct ceph_mds_client *mdsc = 995 struct ceph_mds_client *mdsc =
992 ceph_sb_to_client(inode->i_sb)->mdsc; 996 ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1231 session->s_trim_caps--; 1235 session->s_trim_caps--;
1232 if (oissued) { 1236 if (oissued) {
1233 /* we aren't the only cap.. just remove us */ 1237 /* we aren't the only cap.. just remove us */
1234 __queue_cap_release(session, ceph_ino(inode), cap->cap_id, 1238 __ceph_remove_cap(cap, true);
1235 cap->mseq, cap->issue_seq);
1236 __ceph_remove_cap(cap);
1237 } else { 1239 } else {
1238 /* try to drop referring dentries */ 1240 /* try to drop referring dentries */
1239 spin_unlock(&ci->i_ceph_lock); 1241 spin_unlock(&ci->i_ceph_lock);
@@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1416 unsigned num; 1418 unsigned num;
1417 1419
1418 dout("discard_cap_releases mds%d\n", session->s_mds); 1420 dout("discard_cap_releases mds%d\n", session->s_mds);
1419 spin_lock(&session->s_cap_lock);
1420 1421
1421 /* zero out the in-progress message */ 1422 /* zero out the in-progress message */
1422 msg = list_first_entry(&session->s_cap_releases, 1423 msg = list_first_entry(&session->s_cap_releases,
@@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1443 msg->front.iov_len = sizeof(*head); 1444 msg->front.iov_len = sizeof(*head);
1444 list_add(&msg->list_head, &session->s_cap_releases); 1445 list_add(&msg->list_head, &session->s_cap_releases);
1445 } 1446 }
1446
1447 spin_unlock(&session->s_cap_lock);
1448} 1447}
1449 1448
1450/* 1449/*
@@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
1875 int mds = -1; 1874 int mds = -1;
1876 int err = -EAGAIN; 1875 int err = -EAGAIN;
1877 1876
1878 if (req->r_err || req->r_got_result) 1877 if (req->r_err || req->r_got_result) {
1878 if (req->r_aborted)
1879 __unregister_request(mdsc, req);
1879 goto out; 1880 goto out;
1881 }
1880 1882
1881 if (req->r_timeout && 1883 if (req->r_timeout &&
1882 time_after_eq(jiffies, req->r_started + req->r_timeout)) { 1884 time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2186 if (head->safe) { 2188 if (head->safe) {
2187 req->r_got_safe = true; 2189 req->r_got_safe = true;
2188 __unregister_request(mdsc, req); 2190 __unregister_request(mdsc, req);
2189 complete_all(&req->r_safe_completion);
2190 2191
2191 if (req->r_got_unsafe) { 2192 if (req->r_got_unsafe) {
2192 /* 2193 /*
@@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2238 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); 2239 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
2239 if (err == 0) { 2240 if (err == 0) {
2240 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || 2241 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
2241 req->r_op == CEPH_MDS_OP_LSSNAP) && 2242 req->r_op == CEPH_MDS_OP_LSSNAP))
2242 rinfo->dir_nr)
2243 ceph_readdir_prepopulate(req, req->r_session); 2243 ceph_readdir_prepopulate(req, req->r_session);
2244 ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 2244 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2245 } 2245 }
@@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2490 cap->seq = 0; /* reset cap seq */ 2490 cap->seq = 0; /* reset cap seq */
2491 cap->issue_seq = 0; /* and issue_seq */ 2491 cap->issue_seq = 0; /* and issue_seq */
2492 cap->mseq = 0; /* and migrate_seq */ 2492 cap->mseq = 0; /* and migrate_seq */
2493 cap->cap_gen = cap->session->s_cap_gen;
2493 2494
2494 if (recon_state->flock) { 2495 if (recon_state->flock) {
2495 rec.v2.cap_id = cpu_to_le64(cap->cap_id); 2496 rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2552,6 +2553,8 @@ encode_again:
2552 } else { 2553 } else {
2553 err = ceph_pagelist_append(pagelist, &rec, reclen); 2554 err = ceph_pagelist_append(pagelist, &rec, reclen);
2554 } 2555 }
2556
2557 recon_state->nr_caps++;
2555out_free: 2558out_free:
2556 kfree(path); 2559 kfree(path);
2557out_dput: 2560out_dput:
@@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2579 struct rb_node *p; 2582 struct rb_node *p;
2580 int mds = session->s_mds; 2583 int mds = session->s_mds;
2581 int err = -ENOMEM; 2584 int err = -ENOMEM;
2585 int s_nr_caps;
2582 struct ceph_pagelist *pagelist; 2586 struct ceph_pagelist *pagelist;
2583 struct ceph_reconnect_state recon_state; 2587 struct ceph_reconnect_state recon_state;
2584 2588
@@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2610 dout("session %p state %s\n", session, 2614 dout("session %p state %s\n", session,
2611 session_state_name(session->s_state)); 2615 session_state_name(session->s_state));
2612 2616
2617 spin_lock(&session->s_gen_ttl_lock);
2618 session->s_cap_gen++;
2619 spin_unlock(&session->s_gen_ttl_lock);
2620
2621 spin_lock(&session->s_cap_lock);
2622 /*
2623 * notify __ceph_remove_cap() that we are composing cap reconnect.
2624 * If a cap get released before being added to the cap reconnect,
2625 * __ceph_remove_cap() should skip queuing cap release.
2626 */
2627 session->s_cap_reconnect = 1;
2613 /* drop old cap expires; we're about to reestablish that state */ 2628 /* drop old cap expires; we're about to reestablish that state */
2614 discard_cap_releases(mdsc, session); 2629 discard_cap_releases(mdsc, session);
2630 spin_unlock(&session->s_cap_lock);
2615 2631
2616 /* traverse this session's caps */ 2632 /* traverse this session's caps */
2617 err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); 2633 s_nr_caps = session->s_nr_caps;
2634 err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
2618 if (err) 2635 if (err)
2619 goto fail; 2636 goto fail;
2620 2637
2638 recon_state.nr_caps = 0;
2621 recon_state.pagelist = pagelist; 2639 recon_state.pagelist = pagelist;
2622 recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; 2640 recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
2623 err = iterate_session_caps(session, encode_caps_cb, &recon_state); 2641 err = iterate_session_caps(session, encode_caps_cb, &recon_state);
2624 if (err < 0) 2642 if (err < 0)
2625 goto fail; 2643 goto fail;
2626 2644
2645 spin_lock(&session->s_cap_lock);
2646 session->s_cap_reconnect = 0;
2647 spin_unlock(&session->s_cap_lock);
2648
2627 /* 2649 /*
2628 * snaprealms. we provide mds with the ino, seq (version), and 2650 * snaprealms. we provide mds with the ino, seq (version), and
2629 * parent for all of our realms. If the mds has any newer info, 2651 * parent for all of our realms. If the mds has any newer info,
@@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2646 2668
2647 if (recon_state.flock) 2669 if (recon_state.flock)
2648 reply->hdr.version = cpu_to_le16(2); 2670 reply->hdr.version = cpu_to_le16(2);
2649 if (pagelist->length) { 2671
2650 /* set up outbound data if we have any */ 2672 /* raced with cap release? */
2651 reply->hdr.data_len = cpu_to_le32(pagelist->length); 2673 if (s_nr_caps != recon_state.nr_caps) {
2652 ceph_msg_data_add_pagelist(reply, pagelist); 2674 struct page *page = list_first_entry(&pagelist->head,
2675 struct page, lru);
2676 __le32 *addr = kmap_atomic(page);
2677 *addr = cpu_to_le32(recon_state.nr_caps);
2678 kunmap_atomic(addr);
2653 } 2679 }
2680
2681 reply->hdr.data_len = cpu_to_le32(pagelist->length);
2682 ceph_msg_data_add_pagelist(reply, pagelist);
2654 ceph_con_send(&session->s_con, reply); 2683 ceph_con_send(&session->s_con, reply);
2655 2684
2656 mutex_unlock(&session->s_mutex); 2685 mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fbbe517..4c053d099ae4 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
132 struct list_head s_caps; /* all caps issued by this session */ 132 struct list_head s_caps; /* all caps issued by this session */
133 int s_nr_caps, s_trim_caps; 133 int s_nr_caps, s_trim_caps;
134 int s_num_cap_releases; 134 int s_num_cap_releases;
135 int s_cap_reconnect;
135 struct list_head s_cap_releases; /* waiting cap_release messages */ 136 struct list_head s_cap_releases; /* waiting cap_release messages */
136 struct list_head s_cap_releases_done; /* ready to send */ 137 struct list_head s_cap_releases_done; /* ready to send */
137 struct ceph_cap *s_cap_iterator; 138 struct ceph_cap *s_cap_iterator;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6014b0a3c405..ef4ac38bb614 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode,
741 int fmode, unsigned issued, unsigned wanted, 741 int fmode, unsigned issued, unsigned wanted,
742 unsigned cap, unsigned seq, u64 realmino, int flags, 742 unsigned cap, unsigned seq, u64 realmino, int flags,
743 struct ceph_cap_reservation *caps_reservation); 743 struct ceph_cap_reservation *caps_reservation);
744extern void __ceph_remove_cap(struct ceph_cap *cap); 744extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
745static inline void ceph_remove_cap(struct ceph_cap *cap)
746{
747 spin_lock(&cap->ci->i_ceph_lock);
748 __ceph_remove_cap(cap);
749 spin_unlock(&cap->ci->i_ceph_lock);
750}
751extern void ceph_put_cap(struct ceph_mds_client *mdsc, 745extern void ceph_put_cap(struct ceph_mds_client *mdsc,
752 struct ceph_cap *cap); 746 struct ceph_cap *cap);
753 747
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d9ea7ada1378..f918a998a087 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -384,6 +384,7 @@ struct smb_version_operations {
384 int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, 384 int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
385 struct cifsFileInfo *target_file, u64 src_off, u64 len, 385 struct cifsFileInfo *target_file, u64 src_off, u64 len,
386 u64 dest_off); 386 u64 dest_off);
387 int (*validate_negotiate)(const unsigned int, struct cifs_tcon *);
387}; 388};
388 389
389struct smb_version_values { 390struct smb_version_values {
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 409b45eefe70..77492301cc2b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -26,13 +26,15 @@
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/btrfs.h>
30#include "cifspdu.h" 29#include "cifspdu.h"
31#include "cifsglob.h" 30#include "cifsglob.h"
32#include "cifsproto.h" 31#include "cifsproto.h"
33#include "cifs_debug.h" 32#include "cifs_debug.h"
34#include "cifsfs.h" 33#include "cifsfs.h"
35 34
35#define CIFS_IOCTL_MAGIC 0xCF
36#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
37
36static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, 38static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
37 unsigned long srcfd, u64 off, u64 len, u64 destoff) 39 unsigned long srcfd, u64 off, u64 len, u64 destoff)
38{ 40{
@@ -213,7 +215,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
213 cifs_dbg(FYI, "set compress flag rc %d\n", rc); 215 cifs_dbg(FYI, "set compress flag rc %d\n", rc);
214 } 216 }
215 break; 217 break;
216 case BTRFS_IOC_CLONE: 218 case CIFS_IOC_COPYCHUNK_FILE:
217 rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); 219 rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0);
218 break; 220 break;
219 default: 221 default:
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 11dde4b24f8a..757da3e54d3d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -532,7 +532,10 @@ smb2_clone_range(const unsigned int xid,
532 int rc; 532 int rc;
533 unsigned int ret_data_len; 533 unsigned int ret_data_len;
534 struct copychunk_ioctl *pcchunk; 534 struct copychunk_ioctl *pcchunk;
535 char *retbuf = NULL; 535 struct copychunk_ioctl_rsp *retbuf = NULL;
536 struct cifs_tcon *tcon;
537 int chunks_copied = 0;
538 bool chunk_sizes_updated = false;
536 539
537 pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); 540 pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
538 541
@@ -547,27 +550,96 @@ smb2_clone_range(const unsigned int xid,
547 550
548 /* Note: request_res_key sets res_key null only if rc !=0 */ 551 /* Note: request_res_key sets res_key null only if rc !=0 */
549 if (rc) 552 if (rc)
550 return rc; 553 goto cchunk_out;
551 554
552 /* For now array only one chunk long, will make more flexible later */ 555 /* For now array only one chunk long, will make more flexible later */
553 pcchunk->ChunkCount = __constant_cpu_to_le32(1); 556 pcchunk->ChunkCount = __constant_cpu_to_le32(1);
554 pcchunk->Reserved = 0; 557 pcchunk->Reserved = 0;
555 pcchunk->SourceOffset = cpu_to_le64(src_off);
556 pcchunk->TargetOffset = cpu_to_le64(dest_off);
557 pcchunk->Length = cpu_to_le32(len);
558 pcchunk->Reserved2 = 0; 558 pcchunk->Reserved2 = 0;
559 559
560 /* Request that server copy to target from src file identified by key */ 560 tcon = tlink_tcon(trgtfile->tlink);
561 rc = SMB2_ioctl(xid, tlink_tcon(trgtfile->tlink),
562 trgtfile->fid.persistent_fid,
563 trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
564 true /* is_fsctl */, (char *)pcchunk,
565 sizeof(struct copychunk_ioctl), &retbuf, &ret_data_len);
566 561
567 /* BB need to special case rc = EINVAL to alter chunk size */ 562 while (len > 0) {
563 pcchunk->SourceOffset = cpu_to_le64(src_off);
564 pcchunk->TargetOffset = cpu_to_le64(dest_off);
565 pcchunk->Length =
566 cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
568 567
569 cifs_dbg(FYI, "rc %d data length out %d\n", rc, ret_data_len); 568 /* Request server copy to target from src identified by key */
569 rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
570 trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
571 true /* is_fsctl */, (char *)pcchunk,
572 sizeof(struct copychunk_ioctl), (char **)&retbuf,
573 &ret_data_len);
574 if (rc == 0) {
575 if (ret_data_len !=
576 sizeof(struct copychunk_ioctl_rsp)) {
577 cifs_dbg(VFS, "invalid cchunk response size\n");
578 rc = -EIO;
579 goto cchunk_out;
580 }
581 if (retbuf->TotalBytesWritten == 0) {
582 cifs_dbg(FYI, "no bytes copied\n");
583 rc = -EIO;
584 goto cchunk_out;
585 }
586 /*
587 * Check if server claimed to write more than we asked
588 */
589 if (le32_to_cpu(retbuf->TotalBytesWritten) >
590 le32_to_cpu(pcchunk->Length)) {
591 cifs_dbg(VFS, "invalid copy chunk response\n");
592 rc = -EIO;
593 goto cchunk_out;
594 }
595 if (le32_to_cpu(retbuf->ChunksWritten) != 1) {
596 cifs_dbg(VFS, "invalid num chunks written\n");
597 rc = -EIO;
598 goto cchunk_out;
599 }
600 chunks_copied++;
601
602 src_off += le32_to_cpu(retbuf->TotalBytesWritten);
603 dest_off += le32_to_cpu(retbuf->TotalBytesWritten);
604 len -= le32_to_cpu(retbuf->TotalBytesWritten);
605
606 cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n",
607 le32_to_cpu(retbuf->ChunksWritten),
608 le32_to_cpu(retbuf->ChunkBytesWritten),
609 le32_to_cpu(retbuf->TotalBytesWritten));
610 } else if (rc == -EINVAL) {
611 if (ret_data_len != sizeof(struct copychunk_ioctl_rsp))
612 goto cchunk_out;
613
614 cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n",
615 le32_to_cpu(retbuf->ChunksWritten),
616 le32_to_cpu(retbuf->ChunkBytesWritten),
617 le32_to_cpu(retbuf->TotalBytesWritten));
618
619 /*
620 * Check if this is the first request using these sizes,
621 * (ie check if copy succeed once with original sizes
622 * and check if the server gave us different sizes after
623 * we already updated max sizes on previous request).
624 * if not then why is the server returning an error now
625 */
626 if ((chunks_copied != 0) || chunk_sizes_updated)
627 goto cchunk_out;
628
629 /* Check that server is not asking us to grow size */
630 if (le32_to_cpu(retbuf->ChunkBytesWritten) <
631 tcon->max_bytes_chunk)
632 tcon->max_bytes_chunk =
633 le32_to_cpu(retbuf->ChunkBytesWritten);
634 else
635 goto cchunk_out; /* server gave us bogus size */
636
637 /* No need to change MaxChunks since already set to 1 */
638 chunk_sizes_updated = true;
639 }
640 }
570 641
642cchunk_out:
571 kfree(pcchunk); 643 kfree(pcchunk);
572 return rc; 644 return rc;
573} 645}
@@ -1247,6 +1319,7 @@ struct smb_version_operations smb30_operations = {
1247 .create_lease_buf = smb3_create_lease_buf, 1319 .create_lease_buf = smb3_create_lease_buf,
1248 .parse_lease_buf = smb3_parse_lease_buf, 1320 .parse_lease_buf = smb3_parse_lease_buf,
1249 .clone_range = smb2_clone_range, 1321 .clone_range = smb2_clone_range,
1322 .validate_negotiate = smb3_validate_negotiate,
1250}; 1323};
1251 1324
1252struct smb_version_values smb20_values = { 1325struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d65270c290a1..2013234b73ad 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -454,6 +454,81 @@ neg_exit:
454 return rc; 454 return rc;
455} 455}
456 456
457int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
458{
459 int rc = 0;
460 struct validate_negotiate_info_req vneg_inbuf;
461 struct validate_negotiate_info_rsp *pneg_rsp;
462 u32 rsplen;
463
464 cifs_dbg(FYI, "validate negotiate\n");
465
466 /*
467 * validation ioctl must be signed, so no point sending this if we
468 * can not sign it. We could eventually change this to selectively
469 * sign just this, the first and only signed request on a connection.
470 * This is good enough for now since a user who wants better security
471 * would also enable signing on the mount. Having validation of
472 * negotiate info for signed connections helps reduce attack vectors
473 */
474 if (tcon->ses->server->sign == false)
475 return 0; /* validation requires signing */
476
477 vneg_inbuf.Capabilities =
478 cpu_to_le32(tcon->ses->server->vals->req_capabilities);
479 memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
480
481 if (tcon->ses->sign)
482 vneg_inbuf.SecurityMode =
483 cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED);
484 else if (global_secflags & CIFSSEC_MAY_SIGN)
485 vneg_inbuf.SecurityMode =
486 cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED);
487 else
488 vneg_inbuf.SecurityMode = 0;
489
490 vneg_inbuf.DialectCount = cpu_to_le16(1);
491 vneg_inbuf.Dialects[0] =
492 cpu_to_le16(tcon->ses->server->vals->protocol_id);
493
494 rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
495 FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
496 (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
497 (char **)&pneg_rsp, &rsplen);
498
499 if (rc != 0) {
500 cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc);
501 return -EIO;
502 }
503
504 if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
505 cifs_dbg(VFS, "invalid size of protocol negotiate response\n");
506 return -EIO;
507 }
508
509 /* check validate negotiate info response matches what we got earlier */
510 if (pneg_rsp->Dialect !=
511 cpu_to_le16(tcon->ses->server->vals->protocol_id))
512 goto vneg_out;
513
514 if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode))
515 goto vneg_out;
516
517 /* do not validate server guid because not saved at negprot time yet */
518
519 if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND |
520 SMB2_LARGE_FILES) != tcon->ses->server->capabilities)
521 goto vneg_out;
522
523 /* validate negotiate successful */
524 cifs_dbg(FYI, "validate negotiate info successful\n");
525 return 0;
526
527vneg_out:
528 cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n");
529 return -EIO;
530}
531
457int 532int
458SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, 533SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
459 const struct nls_table *nls_cp) 534 const struct nls_table *nls_cp)
@@ -829,6 +904,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
829 ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) 904 ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
830 cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); 905 cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
831 init_copy_chunk_defaults(tcon); 906 init_copy_chunk_defaults(tcon);
907 if (tcon->ses->server->ops->validate_negotiate)
908 rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
832tcon_exit: 909tcon_exit:
833 free_rsp_buf(resp_buftype, rsp); 910 free_rsp_buf(resp_buftype, rsp);
834 kfree(unc_path); 911 kfree(unc_path);
@@ -1214,10 +1291,17 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1214 rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); 1291 rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
1215 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; 1292 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
1216 1293
1217 if (rc != 0) { 1294 if ((rc != 0) && (rc != -EINVAL)) {
1218 if (tcon) 1295 if (tcon)
1219 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); 1296 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1220 goto ioctl_exit; 1297 goto ioctl_exit;
1298 } else if (rc == -EINVAL) {
1299 if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
1300 (opcode != FSCTL_SRV_COPYCHUNK)) {
1301 if (tcon)
1302 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1303 goto ioctl_exit;
1304 }
1221 } 1305 }
1222 1306
1223 /* check if caller wants to look at return data or just return rc */ 1307 /* check if caller wants to look at return data or just return rc */
@@ -2154,11 +2238,9 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
2154 rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); 2238 rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0);
2155 rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; 2239 rsp = (struct smb2_set_info_rsp *)iov[0].iov_base;
2156 2240
2157 if (rc != 0) { 2241 if (rc != 0)
2158 cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); 2242 cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
2159 goto out; 2243
2160 }
2161out:
2162 free_rsp_buf(resp_buftype, rsp); 2244 free_rsp_buf(resp_buftype, rsp);
2163 kfree(iov); 2245 kfree(iov);
2164 return rc; 2246 return rc;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f88320bbb477..2022c542ea3a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -577,13 +577,19 @@ struct copychunk_ioctl_rsp {
577 __le32 TotalBytesWritten; 577 __le32 TotalBytesWritten;
578} __packed; 578} __packed;
579 579
580/* Response and Request are the same format */ 580struct validate_negotiate_info_req {
581struct validate_negotiate_info {
582 __le32 Capabilities; 581 __le32 Capabilities;
583 __u8 Guid[SMB2_CLIENT_GUID_SIZE]; 582 __u8 Guid[SMB2_CLIENT_GUID_SIZE];
584 __le16 SecurityMode; 583 __le16 SecurityMode;
585 __le16 DialectCount; 584 __le16 DialectCount;
586 __le16 Dialect[1]; 585 __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
586} __packed;
587
588struct validate_negotiate_info_rsp {
589 __le32 Capabilities;
590 __u8 Guid[SMB2_CLIENT_GUID_SIZE];
591 __le16 SecurityMode;
592 __le16 Dialect; /* Dialect in use for the connection */
587} __packed; 593} __packed;
588 594
589#define RSS_CAPABLE 0x00000001 595#define RSS_CAPABLE 0x00000001
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index b4eea105b08c..93adc64666f3 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -162,5 +162,6 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
162 struct smb2_lock_element *buf); 162 struct smb2_lock_element *buf);
163extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, 163extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
164 __u8 *lease_key, const __le32 lease_state); 164 __u8 *lease_key, const __le32 lease_state);
165extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
165 166
166#endif /* _SMB2PROTO_H */ 167#endif /* _SMB2PROTO_H */
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index a4b2391fe66e..0e538b5c9622 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -90,7 +90,7 @@
90#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ 90#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */
91#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ 91#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
92#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ 92#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
93#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ 93#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
94/* Perform server-side data movement */ 94/* Perform server-side data movement */
95#define FSCTL_SRV_COPYCHUNK 0x001440F2 95#define FSCTL_SRV_COPYCHUNK 0x001440F2
96#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 96#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2
diff --git a/fs/dcache.c b/fs/dcache.c
index 4bdb300b16e2..6055d61811d3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -192,7 +192,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
192 if (!tcount) 192 if (!tcount)
193 return 0; 193 return 0;
194 } 194 }
195 mask = ~(~0ul << tcount*8); 195 mask = bytemask_from_count(tcount);
196 return unlikely(!!((a ^ b) & mask)); 196 return unlikely(!!((a ^ b) & mask));
197} 197}
198 198
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 79b65c3b9e87..8b5e2584c840 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1852,8 +1852,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1852 goto error_tgt_fput; 1852 goto error_tgt_fput;
1853 1853
1854 /* Check if EPOLLWAKEUP is allowed */ 1854 /* Check if EPOLLWAKEUP is allowed */
1855 if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) 1855 ep_take_care_of_epollwakeup(&epds);
1856 epds.events &= ~EPOLLWAKEUP;
1857 1856
1858 /* 1857 /*
1859 * We have to check that the file structure underneath the file descriptor 1858 * We have to check that the file structure underneath the file descriptor
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 288534920fe5..20d6697bd638 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1493 sb->s_blocksize - offset : towrite; 1493 sb->s_blocksize - offset : towrite;
1494 1494
1495 tmp_bh.b_state = 0; 1495 tmp_bh.b_state = 0;
1496 tmp_bh.b_size = sb->s_blocksize;
1496 err = ext2_get_block(inode, blk, &tmp_bh, 1); 1497 err = ext2_get_block(inode, blk, &tmp_bh, 1);
1497 if (err < 0) 1498 if (err < 0)
1498 goto out; 1499 goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e6185031c1cc..ece55565b9cd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -268,6 +268,16 @@ struct ext4_io_submit {
268/* Translate # of blks to # of clusters */ 268/* Translate # of blks to # of clusters */
269#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ 269#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
270 (sbi)->s_cluster_bits) 270 (sbi)->s_cluster_bits)
271/* Mask out the low bits to get the starting block of the cluster */
272#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
273 ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
274#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
275 ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
276/* Get the cluster offset */
277#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
278 ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
279#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
280 ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
271 281
272/* 282/*
273 * Structure of a blocks group descriptor 283 * Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 17ac112ab101..3fe29de832c8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
259 if (WARN_ON_ONCE(err)) { 259 if (WARN_ON_ONCE(err)) {
260 ext4_journal_abort_handle(where, line, __func__, bh, 260 ext4_journal_abort_handle(where, line, __func__, bh,
261 handle, err); 261 handle, err);
262 ext4_error_inode(inode, where, line,
263 bh->b_blocknr,
264 "journal_dirty_metadata failed: "
265 "handle type %u started at line %u, "
266 "credits %u/%u, errcode %d",
267 handle->h_type,
268 handle->h_line_no,
269 handle->h_requested_credits,
270 handle->h_buffer_credits, err);
262 } 271 }
263 } else { 272 } else {
264 if (inode) 273 if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 35f65cf4f318..4410cc3d6ee2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
360{ 360{
361 ext4_fsblk_t block = ext4_ext_pblock(ext); 361 ext4_fsblk_t block = ext4_ext_pblock(ext);
362 int len = ext4_ext_get_actual_len(ext); 362 int len = ext4_ext_get_actual_len(ext);
363 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
364 ext4_lblk_t last = lblock + len - 1;
363 365
364 if (len == 0) 366 if (lblock > last)
365 return 0; 367 return 0;
366 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 368 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
367} 369}
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
387 if (depth == 0) { 389 if (depth == 0) {
388 /* leaf entries */ 390 /* leaf entries */
389 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); 391 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
392 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
393 ext4_fsblk_t pblock = 0;
394 ext4_lblk_t lblock = 0;
395 ext4_lblk_t prev = 0;
396 int len = 0;
390 while (entries) { 397 while (entries) {
391 if (!ext4_valid_extent(inode, ext)) 398 if (!ext4_valid_extent(inode, ext))
392 return 0; 399 return 0;
400
401 /* Check for overlapping extents */
402 lblock = le32_to_cpu(ext->ee_block);
403 len = ext4_ext_get_actual_len(ext);
404 if ((lblock <= prev) && prev) {
405 pblock = ext4_ext_pblock(ext);
406 es->s_last_error_block = cpu_to_le64(pblock);
407 return 0;
408 }
393 ext++; 409 ext++;
394 entries--; 410 entries--;
411 prev = lblock + len - 1;
395 } 412 }
396 } else { 413 } else {
397 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); 414 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1834 depth = ext_depth(inode); 1851 depth = ext_depth(inode);
1835 if (!path[depth].p_ext) 1852 if (!path[depth].p_ext)
1836 goto out; 1853 goto out;
1837 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1854 b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
1838 b2 &= ~(sbi->s_cluster_ratio - 1);
1839 1855
1840 /* 1856 /*
1841 * get the next allocated block if the extent in the path 1857 * get the next allocated block if the extent in the path
@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1845 b2 = ext4_ext_next_allocated_block(path); 1861 b2 = ext4_ext_next_allocated_block(path);
1846 if (b2 == EXT_MAX_BLOCKS) 1862 if (b2 == EXT_MAX_BLOCKS)
1847 goto out; 1863 goto out;
1848 b2 &= ~(sbi->s_cluster_ratio - 1); 1864 b2 = EXT4_LBLK_CMASK(sbi, b2);
1849 } 1865 }
1850 1866
1851 /* check for wrap through zero on extent logical start block*/ 1867 /* check for wrap through zero on extent logical start block*/
@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2504 * extent, we have to mark the cluster as used (store negative 2520 * extent, we have to mark the cluster as used (store negative
2505 * cluster number in partial_cluster). 2521 * cluster number in partial_cluster).
2506 */ 2522 */
2507 unaligned = pblk & (sbi->s_cluster_ratio - 1); 2523 unaligned = EXT4_PBLK_COFF(sbi, pblk);
2508 if (unaligned && (ee_len == num) && 2524 if (unaligned && (ee_len == num) &&
2509 (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) 2525 (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
2510 *partial_cluster = EXT4_B2C(sbi, pblk); 2526 *partial_cluster = EXT4_B2C(sbi, pblk);
@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2598 * accidentally freeing it later on 2614 * accidentally freeing it later on
2599 */ 2615 */
2600 pblk = ext4_ext_pblock(ex); 2616 pblk = ext4_ext_pblock(ex);
2601 if (pblk & (sbi->s_cluster_ratio - 1)) 2617 if (EXT4_PBLK_COFF(sbi, pblk))
2602 *partial_cluster = 2618 *partial_cluster =
2603 -((long long)EXT4_B2C(sbi, pblk)); 2619 -((long long)EXT4_B2C(sbi, pblk));
2604 ex--; 2620 ex--;
@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
3753{ 3769{
3754 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3770 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3755 ext4_lblk_t lblk_start, lblk_end; 3771 ext4_lblk_t lblk_start, lblk_end;
3756 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); 3772 lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
3757 lblk_end = lblk_start + sbi->s_cluster_ratio - 1; 3773 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3758 3774
3759 return ext4_find_delalloc_range(inode, lblk_start, lblk_end); 3775 return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3812 trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); 3828 trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
3813 3829
3814 /* Check towards left side */ 3830 /* Check towards left side */
3815 c_offset = lblk_start & (sbi->s_cluster_ratio - 1); 3831 c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
3816 if (c_offset) { 3832 if (c_offset) {
3817 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); 3833 lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
3818 lblk_to = lblk_from + c_offset - 1; 3834 lblk_to = lblk_from + c_offset - 1;
3819 3835
3820 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) 3836 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3822 } 3838 }
3823 3839
3824 /* Now check towards right. */ 3840 /* Now check towards right. */
3825 c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); 3841 c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
3826 if (allocated_clusters && c_offset) { 3842 if (allocated_clusters && c_offset) {
3827 lblk_from = lblk_start + num_blks; 3843 lblk_from = lblk_start + num_blks;
3828 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; 3844 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
4030 struct ext4_ext_path *path) 4046 struct ext4_ext_path *path)
4031{ 4047{
4032 struct ext4_sb_info *sbi = EXT4_SB(sb); 4048 struct ext4_sb_info *sbi = EXT4_SB(sb);
4033 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 4049 ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4034 ext4_lblk_t ex_cluster_start, ex_cluster_end; 4050 ext4_lblk_t ex_cluster_start, ex_cluster_end;
4035 ext4_lblk_t rr_cluster_start; 4051 ext4_lblk_t rr_cluster_start;
4036 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 4052 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
4048 (rr_cluster_start == ex_cluster_start)) { 4064 (rr_cluster_start == ex_cluster_start)) {
4049 if (rr_cluster_start == ex_cluster_end) 4065 if (rr_cluster_start == ex_cluster_end)
4050 ee_start += ee_len - 1; 4066 ee_start += ee_len - 1;
4051 map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + 4067 map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
4052 c_offset;
4053 map->m_len = min(map->m_len, 4068 map->m_len = min(map->m_len,
4054 (unsigned) sbi->s_cluster_ratio - c_offset); 4069 (unsigned) sbi->s_cluster_ratio - c_offset);
4055 /* 4070 /*
@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4203 */ 4218 */
4204 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; 4219 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
4205 newex.ee_block = cpu_to_le32(map->m_lblk); 4220 newex.ee_block = cpu_to_le32(map->m_lblk);
4206 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 4221 cluster_offset = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4207 4222
4208 /* 4223 /*
4209 * If we are doing bigalloc, check to see if the extent returned 4224 * If we are doing bigalloc, check to see if the extent returned
@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4271 * needed so that future calls to get_implied_cluster_alloc() 4286 * needed so that future calls to get_implied_cluster_alloc()
4272 * work correctly. 4287 * work correctly.
4273 */ 4288 */
4274 offset = map->m_lblk & (sbi->s_cluster_ratio - 1); 4289 offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4275 ar.len = EXT4_NUM_B2C(sbi, offset+allocated); 4290 ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4276 ar.goal -= offset; 4291 ar.goal -= offset;
4277 ar.logical -= offset; 4292 ar.logical -= offset;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 075763474118..61d49ff22c81 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file,
1206 */ 1206 */
1207static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) 1207static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1208{ 1208{
1209 int retries = 0;
1210 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1209 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1211 struct ext4_inode_info *ei = EXT4_I(inode); 1210 struct ext4_inode_info *ei = EXT4_I(inode);
1212 unsigned int md_needed; 1211 unsigned int md_needed;
@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1218 * in order to allocate nrblocks 1217 * in order to allocate nrblocks
1219 * worse case is one extent per block 1218 * worse case is one extent per block
1220 */ 1219 */
1221repeat:
1222 spin_lock(&ei->i_block_reservation_lock); 1220 spin_lock(&ei->i_block_reservation_lock);
1223 /* 1221 /*
1224 * ext4_calc_metadata_amount() has side effects, which we have 1222 * ext4_calc_metadata_amount() has side effects, which we have
@@ -1238,10 +1236,6 @@ repeat:
1238 ei->i_da_metadata_calc_len = save_len; 1236 ei->i_da_metadata_calc_len = save_len;
1239 ei->i_da_metadata_calc_last_lblock = save_last_lblock; 1237 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1240 spin_unlock(&ei->i_block_reservation_lock); 1238 spin_unlock(&ei->i_block_reservation_lock);
1241 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1242 cond_resched();
1243 goto repeat;
1244 }
1245 return -ENOSPC; 1239 return -ENOSPC;
1246 } 1240 }
1247 ei->i_reserved_meta_blocks += md_needed; 1241 ei->i_reserved_meta_blocks += md_needed;
@@ -1255,7 +1249,6 @@ repeat:
1255 */ 1249 */
1256static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1250static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1257{ 1251{
1258 int retries = 0;
1259 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1252 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1260 struct ext4_inode_info *ei = EXT4_I(inode); 1253 struct ext4_inode_info *ei = EXT4_I(inode);
1261 unsigned int md_needed; 1254 unsigned int md_needed;
@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1277 * in order to allocate nrblocks 1270 * in order to allocate nrblocks
1278 * worse case is one extent per block 1271 * worse case is one extent per block
1279 */ 1272 */
1280repeat:
1281 spin_lock(&ei->i_block_reservation_lock); 1273 spin_lock(&ei->i_block_reservation_lock);
1282 /* 1274 /*
1283 * ext4_calc_metadata_amount() has side effects, which we have 1275 * ext4_calc_metadata_amount() has side effects, which we have
@@ -1297,10 +1289,6 @@ repeat:
1297 ei->i_da_metadata_calc_len = save_len; 1289 ei->i_da_metadata_calc_len = save_len;
1298 ei->i_da_metadata_calc_last_lblock = save_last_lblock; 1290 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1299 spin_unlock(&ei->i_block_reservation_lock); 1291 spin_unlock(&ei->i_block_reservation_lock);
1300 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1301 cond_resched();
1302 goto repeat;
1303 }
1304 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1292 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1305 return -ENOSPC; 1293 return -ENOSPC;
1306 } 1294 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4d113efa024c..04a5c7504be9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
3442{ 3442{
3443 struct ext4_prealloc_space *pa; 3443 struct ext4_prealloc_space *pa;
3444 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); 3444 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3445
3446 BUG_ON(atomic_read(&pa->pa_count));
3447 BUG_ON(pa->pa_deleted == 0);
3445 kmem_cache_free(ext4_pspace_cachep, pa); 3448 kmem_cache_free(ext4_pspace_cachep, pa);
3446} 3449}
3447 3450
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3455 ext4_group_t grp; 3458 ext4_group_t grp;
3456 ext4_fsblk_t grp_blk; 3459 ext4_fsblk_t grp_blk;
3457 3460
3458 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3459 return;
3460
3461 /* in this short window concurrent discard can set pa_deleted */ 3461 /* in this short window concurrent discard can set pa_deleted */
3462 spin_lock(&pa->pa_lock); 3462 spin_lock(&pa->pa_lock);
3463 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3464 spin_unlock(&pa->pa_lock);
3465 return;
3466 }
3467
3463 if (pa->pa_deleted == 1) { 3468 if (pa->pa_deleted == 1) {
3464 spin_unlock(&pa->pa_lock); 3469 spin_unlock(&pa->pa_lock);
3465 return; 3470 return;
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4121 ext4_get_group_no_and_offset(sb, goal, &group, &block); 4126 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4122 4127
4123 /* set up allocation goals */ 4128 /* set up allocation goals */
4124 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); 4129 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4125 ac->ac_status = AC_STATUS_CONTINUE; 4130 ac->ac_status = AC_STATUS_CONTINUE;
4126 ac->ac_sb = sb; 4131 ac->ac_sb = sb;
4127 ac->ac_inode = ar->inode; 4132 ac->ac_inode = ar->inode;
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4663 * blocks at the beginning or the end unless we are explicitly 4668 * blocks at the beginning or the end unless we are explicitly
4664 * requested to avoid doing so. 4669 * requested to avoid doing so.
4665 */ 4670 */
4666 overflow = block & (sbi->s_cluster_ratio - 1); 4671 overflow = EXT4_PBLK_COFF(sbi, block);
4667 if (overflow) { 4672 if (overflow) {
4668 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { 4673 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4669 overflow = sbi->s_cluster_ratio - overflow; 4674 overflow = sbi->s_cluster_ratio - overflow;
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4677 count += overflow; 4682 count += overflow;
4678 } 4683 }
4679 } 4684 }
4680 overflow = count & (sbi->s_cluster_ratio - 1); 4685 overflow = EXT4_LBLK_COFF(sbi, count);
4681 if (overflow) { 4686 if (overflow) {
4682 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { 4687 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4683 if (count > overflow) 4688 if (count > overflow)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c977f4e4e63b..1f7784de05b6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb)
792 } 792 }
793 793
794 ext4_es_unregister_shrinker(sbi); 794 ext4_es_unregister_shrinker(sbi);
795 del_timer(&sbi->s_err_report); 795 del_timer_sync(&sbi->s_err_report);
796 ext4_release_system_zone(sb); 796 ext4_release_system_zone(sb);
797 ext4_mb_release(sb); 797 ext4_mb_release(sb);
798 ext4_ext_release(sb); 798 ext4_ext_release(sb);
@@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb)
3316} 3316}
3317 3317
3318 3318
3319static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) 3319static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
3320{ 3320{
3321 ext4_fsblk_t resv_clusters; 3321 ext4_fsblk_t resv_clusters;
3322 3322
3323 /* 3323 /*
3324 * There's no need to reserve anything when we aren't using extents.
3325 * The space estimates are exact, there are no unwritten extents,
3326 * hole punching doesn't need new metadata... This is needed especially
3327 * to keep ext2/3 backward compatibility.
3328 */
3329 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
3330 return 0;
3331 /*
3324 * By default we reserve 2% or 4096 clusters, whichever is smaller. 3332 * By default we reserve 2% or 4096 clusters, whichever is smaller.
3325 * This should cover the situations where we can not afford to run 3333 * This should cover the situations where we can not afford to run
3326 * out of space like for example punch hole, or converting 3334 * out of space like for example punch hole, or converting
@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
3328 * allocation would require 1, or 2 blocks, higher numbers are 3336 * allocation would require 1, or 2 blocks, higher numbers are
3329 * very rare. 3337 * very rare.
3330 */ 3338 */
3331 resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; 3339 resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
3340 EXT4_SB(sb)->s_cluster_bits;
3332 3341
3333 do_div(resv_clusters, 50); 3342 do_div(resv_clusters, 50);
3334 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); 3343 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -4071,10 +4080,10 @@ no_journal:
4071 "available"); 4080 "available");
4072 } 4081 }
4073 4082
4074 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); 4083 err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
4075 if (err) { 4084 if (err) {
4076 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4085 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4077 "reserved pool", ext4_calculate_resv_clusters(sbi)); 4086 "reserved pool", ext4_calculate_resv_clusters(sb));
4078 goto failed_mount4a; 4087 goto failed_mount4a;
4079 } 4088 }
4080 4089
@@ -4184,7 +4193,7 @@ failed_mount_wq:
4184 } 4193 }
4185failed_mount3: 4194failed_mount3:
4186 ext4_es_unregister_shrinker(sbi); 4195 ext4_es_unregister_shrinker(sbi);
4187 del_timer(&sbi->s_err_report); 4196 del_timer_sync(&sbi->s_err_report);
4188 if (sbi->s_flex_groups) 4197 if (sbi->s_flex_groups)
4189 ext4_kvfree(sbi->s_flex_groups); 4198 ext4_kvfree(sbi->s_flex_groups);
4190 percpu_counter_destroy(&sbi->s_freeclusters_counter); 4199 percpu_counter_destroy(&sbi->s_freeclusters_counter);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index b51a6079108d..e9a97a0d4314 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -24,13 +24,6 @@ struct hfsplus_wd {
24 u16 embed_count; 24 u16 embed_count;
25}; 25};
26 26
27static void hfsplus_end_io_sync(struct bio *bio, int err)
28{
29 if (err)
30 clear_bit(BIO_UPTODATE, &bio->bi_flags);
31 complete(bio->bi_private);
32}
33
34/* 27/*
35 * hfsplus_submit_bio - Perfrom block I/O 28 * hfsplus_submit_bio - Perfrom block I/O
36 * @sb: super block of volume for I/O 29 * @sb: super block of volume for I/O
@@ -53,7 +46,6 @@ static void hfsplus_end_io_sync(struct bio *bio, int err)
53int hfsplus_submit_bio(struct super_block *sb, sector_t sector, 46int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
54 void *buf, void **data, int rw) 47 void *buf, void **data, int rw)
55{ 48{
56 DECLARE_COMPLETION_ONSTACK(wait);
57 struct bio *bio; 49 struct bio *bio;
58 int ret = 0; 50 int ret = 0;
59 u64 io_size; 51 u64 io_size;
@@ -73,8 +65,6 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
73 bio = bio_alloc(GFP_NOIO, 1); 65 bio = bio_alloc(GFP_NOIO, 1);
74 bio->bi_sector = sector; 66 bio->bi_sector = sector;
75 bio->bi_bdev = sb->s_bdev; 67 bio->bi_bdev = sb->s_bdev;
76 bio->bi_end_io = hfsplus_end_io_sync;
77 bio->bi_private = &wait;
78 68
79 if (!(rw & WRITE) && data) 69 if (!(rw & WRITE) && data)
80 *data = (u8 *)buf + offset; 70 *data = (u8 *)buf + offset;
@@ -93,12 +83,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
93 buf = (u8 *)buf + len; 83 buf = (u8 *)buf + len;
94 } 84 }
95 85
96 submit_bio(rw, bio); 86 ret = submit_bio_wait(rw, bio);
97 wait_for_completion(&wait);
98
99 if (!bio_flagged(bio, BIO_UPTODATE))
100 ret = -EIO;
101
102out: 87out:
103 bio_put(bio); 88 bio_put(bio);
104 return ret < 0 ? ret : 0; 89 return ret < 0 ? ret : 0;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 52032647dd4a..5fa344afb49a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
702 read_lock(&journal->j_state_lock); 702 read_lock(&journal->j_state_lock);
703#ifdef CONFIG_JBD2_DEBUG 703#ifdef CONFIG_JBD2_DEBUG
704 if (!tid_geq(journal->j_commit_request, tid)) { 704 if (!tid_geq(journal->j_commit_request, tid)) {
705 printk(KERN_EMERG 705 printk(KERN_ERR
706 "%s: error: j_commit_request=%d, tid=%d\n", 706 "%s: error: j_commit_request=%d, tid=%d\n",
707 __func__, journal->j_commit_request, tid); 707 __func__, journal->j_commit_request, tid);
708 } 708 }
@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
718 } 718 }
719 read_unlock(&journal->j_state_lock); 719 read_unlock(&journal->j_state_lock);
720 720
721 if (unlikely(is_journal_aborted(journal))) { 721 if (unlikely(is_journal_aborted(journal)))
722 printk(KERN_EMERG "journal commit I/O error\n");
723 err = -EIO; 722 err = -EIO;
724 }
725 return err; 723 return err;
726} 724}
727 725
@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal)
1527 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1525 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
1528 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1526 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
1529 /* Can't have checksum v1 and v2 on at the same time! */ 1527 /* Can't have checksum v1 and v2 on at the same time! */
1530 printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " 1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
1531 "at the same time!\n"); 1529 "at the same time!\n");
1532 goto out; 1530 goto out;
1533 } 1531 }
1534 1532
1535 if (!jbd2_verify_csum_type(journal, sb)) { 1533 if (!jbd2_verify_csum_type(journal, sb)) {
1536 printk(KERN_ERR "JBD: Unknown checksum type\n"); 1534 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1537 goto out; 1535 goto out;
1538 } 1536 }
1539 1537
@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal)
1541 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1539 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
1542 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1540 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1543 if (IS_ERR(journal->j_chksum_driver)) { 1541 if (IS_ERR(journal->j_chksum_driver)) {
1544 printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); 1542 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
1545 err = PTR_ERR(journal->j_chksum_driver); 1543 err = PTR_ERR(journal->j_chksum_driver);
1546 journal->j_chksum_driver = NULL; 1544 journal->j_chksum_driver = NULL;
1547 goto out; 1545 goto out;
@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal)
1550 1548
1551 /* Check superblock checksum */ 1549 /* Check superblock checksum */
1552 if (!jbd2_superblock_csum_verify(journal, sb)) { 1550 if (!jbd2_superblock_csum_verify(journal, sb)) {
1553 printk(KERN_ERR "JBD: journal checksum error\n"); 1551 printk(KERN_ERR "JBD2: journal checksum error\n");
1554 goto out; 1552 goto out;
1555 } 1553 }
1556 1554
@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1836 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 1834 journal->j_chksum_driver = crypto_alloc_shash("crc32c",
1837 0, 0); 1835 0, 0);
1838 if (IS_ERR(journal->j_chksum_driver)) { 1836 if (IS_ERR(journal->j_chksum_driver)) {
1839 printk(KERN_ERR "JBD: Cannot load crc32c " 1837 printk(KERN_ERR "JBD2: Cannot load crc32c "
1840 "driver.\n"); 1838 "driver.\n");
1841 journal->j_chksum_driver = NULL; 1839 journal->j_chksum_driver = NULL;
1842 return 0; 1840 return 0;
@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void)
2645#ifdef CONFIG_JBD2_DEBUG 2643#ifdef CONFIG_JBD2_DEBUG
2646 int n = atomic_read(&nr_journal_heads); 2644 int n = atomic_read(&nr_journal_heads);
2647 if (n) 2645 if (n)
2648 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); 2646 printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
2649#endif 2647#endif
2650 jbd2_remove_jbd_stats_proc_entry(); 2648 jbd2_remove_jbd_stats_proc_entry();
2651 jbd2_journal_destroy_caches(); 2649 jbd2_journal_destroy_caches();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3929c50428b1..3b6bb19d60b1 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal,
594 be32_to_cpu(tmp->h_sequence))) { 594 be32_to_cpu(tmp->h_sequence))) {
595 brelse(obh); 595 brelse(obh);
596 success = -EIO; 596 success = -EIO;
597 printk(KERN_ERR "JBD: Invalid " 597 printk(KERN_ERR "JBD2: Invalid "
598 "checksum recovering " 598 "checksum recovering "
599 "block %llu in log\n", 599 "block %llu in log\n",
600 blocknr); 600 blocknr);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7aa9a32573bb..8360674c85bc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -932,7 +932,7 @@ repeat:
932 jbd2_alloc(jh2bh(jh)->b_size, 932 jbd2_alloc(jh2bh(jh)->b_size,
933 GFP_NOFS); 933 GFP_NOFS);
934 if (!frozen_buffer) { 934 if (!frozen_buffer) {
935 printk(KERN_EMERG 935 printk(KERN_ERR
936 "%s: OOM for frozen_buffer\n", 936 "%s: OOM for frozen_buffer\n",
937 __func__); 937 __func__);
938 JBUFFER_TRACE(jh, "oom!"); 938 JBUFFER_TRACE(jh, "oom!");
@@ -1166,7 +1166,7 @@ repeat:
1166 if (!jh->b_committed_data) { 1166 if (!jh->b_committed_data) {
1167 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 1167 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
1168 if (!committed_data) { 1168 if (!committed_data) {
1169 printk(KERN_EMERG "%s: No memory for committed data\n", 1169 printk(KERN_ERR "%s: No memory for committed data\n",
1170 __func__); 1170 __func__);
1171 err = -ENOMEM; 1171 err = -ENOMEM;
1172 goto out; 1172 goto out;
@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1290 * once a transaction -bzzz 1290 * once a transaction -bzzz
1291 */ 1291 */
1292 jh->b_modified = 1; 1292 jh->b_modified = 1;
1293 J_ASSERT_JH(jh, handle->h_buffer_credits > 0); 1293 if (handle->h_buffer_credits <= 0) {
1294 ret = -ENOSPC;
1295 goto out_unlock_bh;
1296 }
1294 handle->h_buffer_credits--; 1297 handle->h_buffer_credits--;
1295 } 1298 }
1296 1299
@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1305 JBUFFER_TRACE(jh, "fastpath"); 1308 JBUFFER_TRACE(jh, "fastpath");
1306 if (unlikely(jh->b_transaction != 1309 if (unlikely(jh->b_transaction !=
1307 journal->j_running_transaction)) { 1310 journal->j_running_transaction)) {
1308 printk(KERN_EMERG "JBD: %s: " 1311 printk(KERN_ERR "JBD2: %s: "
1309 "jh->b_transaction (%llu, %p, %u) != " 1312 "jh->b_transaction (%llu, %p, %u) != "
1310 "journal->j_running_transaction (%p, %u)", 1313 "journal->j_running_transaction (%p, %u)",
1311 journal->j_devname, 1314 journal->j_devname,
@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1332 JBUFFER_TRACE(jh, "already on other transaction"); 1335 JBUFFER_TRACE(jh, "already on other transaction");
1333 if (unlikely(jh->b_transaction != 1336 if (unlikely(jh->b_transaction !=
1334 journal->j_committing_transaction)) { 1337 journal->j_committing_transaction)) {
1335 printk(KERN_EMERG "JBD: %s: " 1338 printk(KERN_ERR "JBD2: %s: "
1336 "jh->b_transaction (%llu, %p, %u) != " 1339 "jh->b_transaction (%llu, %p, %u) != "
1337 "journal->j_committing_transaction (%p, %u)", 1340 "journal->j_committing_transaction (%p, %u)",
1338 journal->j_devname, 1341 journal->j_devname,
@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1345 ret = -EINVAL; 1348 ret = -EINVAL;
1346 } 1349 }
1347 if (unlikely(jh->b_next_transaction != transaction)) { 1350 if (unlikely(jh->b_next_transaction != transaction)) {
1348 printk(KERN_EMERG "JBD: %s: " 1351 printk(KERN_ERR "JBD2: %s: "
1349 "jh->b_next_transaction (%llu, %p, %u) != " 1352 "jh->b_next_transaction (%llu, %p, %u) != "
1350 "transaction (%p, %u)", 1353 "transaction (%p, %u)",
1351 journal->j_devname, 1354 journal->j_devname,
@@ -1373,7 +1376,6 @@ out_unlock_bh:
1373 jbd2_journal_put_journal_head(jh); 1376 jbd2_journal_put_journal_head(jh);
1374out: 1377out:
1375 JBUFFER_TRACE(jh, "exit"); 1378 JBUFFER_TRACE(jh, "exit");
1376 WARN_ON(ret); /* All errors are bugs, so dump the stack */
1377 return ret; 1379 return ret;
1378} 1380}
1379 1381
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 550475ca6a0e..0f95f0d0b313 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -14,16 +14,10 @@
14 14
15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
16 16
17static void request_complete(struct bio *bio, int err)
18{
19 complete((struct completion *)bio->bi_private);
20}
21
22static int sync_request(struct page *page, struct block_device *bdev, int rw) 17static int sync_request(struct page *page, struct block_device *bdev, int rw)
23{ 18{
24 struct bio bio; 19 struct bio bio;
25 struct bio_vec bio_vec; 20 struct bio_vec bio_vec;
26 struct completion complete;
27 21
28 bio_init(&bio); 22 bio_init(&bio);
29 bio.bi_max_vecs = 1; 23 bio.bi_max_vecs = 1;
@@ -35,13 +29,8 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
35 bio.bi_size = PAGE_SIZE; 29 bio.bi_size = PAGE_SIZE;
36 bio.bi_bdev = bdev; 30 bio.bi_bdev = bdev;
37 bio.bi_sector = page->index * (PAGE_SIZE >> 9); 31 bio.bi_sector = page->index * (PAGE_SIZE >> 9);
38 init_completion(&complete);
39 bio.bi_private = &complete;
40 bio.bi_end_io = request_complete;
41 32
42 submit_bio(rw, &bio); 33 return submit_bio_wait(rw, &bio);
43 wait_for_completion(&complete);
44 return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
45} 34}
46 35
47static int bdev_readpage(void *_sb, struct page *page) 36static int bdev_readpage(void *_sb, struct page *page)
diff --git a/fs/namei.c b/fs/namei.c
index 8f77a8cea289..3531deebad30 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -513,8 +513,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
513 513
514 if (!lockref_get_not_dead(&parent->d_lockref)) { 514 if (!lockref_get_not_dead(&parent->d_lockref)) {
515 nd->path.dentry = NULL; 515 nd->path.dentry = NULL;
516 rcu_read_unlock(); 516 goto out;
517 return -ECHILD;
518 } 517 }
519 518
520 /* 519 /*
@@ -1599,11 +1598,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1599 * do a "get_unaligned()" if this helps and is sufficiently 1598 * do a "get_unaligned()" if this helps and is sufficiently
1600 * fast. 1599 * fast.
1601 * 1600 *
1602 * - Little-endian machines (so that we can generate the mask
1603 * of low bytes efficiently). Again, we *could* do a byte
1604 * swapping load on big-endian architectures if that is not
1605 * expensive enough to make the optimization worthless.
1606 *
1607 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we 1601 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
1608 * do not trap on the (extremely unlikely) case of a page 1602 * do not trap on the (extremely unlikely) case of a page
1609 * crossing operation. 1603 * crossing operation.
@@ -1647,7 +1641,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1647 if (!len) 1641 if (!len)
1648 goto done; 1642 goto done;
1649 } 1643 }
1650 mask = ~(~0ul << len*8); 1644 mask = bytemask_from_count(len);
1651 hash += mask & a; 1645 hash += mask & a;
1652done: 1646done:
1653 return fold_hash(hash); 1647 return fold_hash(hash);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 8485978993e8..9838fb020473 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -36,6 +36,7 @@
36#include <linux/nfs_fs.h> 36#include <linux/nfs_fs.h>
37#include <linux/sunrpc/rpc_pipe_fs.h> 37#include <linux/sunrpc/rpc_pipe_fs.h>
38 38
39#include "../nfs4_fs.h"
39#include "../pnfs.h" 40#include "../pnfs.h"
40#include "../netns.h" 41#include "../netns.h"
41 42
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 9c3e117c3ed1..4d0161442565 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
44static inline sector_t normalize(sector_t s, int base) 44static inline sector_t normalize(sector_t s, int base)
45{ 45{
46 sector_t tmp = s; /* Since do_div modifies its argument */ 46 sector_t tmp = s; /* Since do_div modifies its argument */
47 return s - do_div(tmp, base); 47 return s - sector_div(tmp, base);
48} 48}
49 49
50static inline sector_t normalize_up(sector_t s, int base) 50static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index fc0f95ec7358..d25f10fb4926 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -46,7 +46,9 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
46#include <linux/sunrpc/cache.h> 46#include <linux/sunrpc/cache.h>
47#include <linux/sunrpc/svcauth.h> 47#include <linux/sunrpc/svcauth.h>
48#include <linux/sunrpc/rpc_pipe_fs.h> 48#include <linux/sunrpc/rpc_pipe_fs.h>
49#include <linux/nfs_fs.h>
49 50
51#include "nfs4_fs.h"
50#include "dns_resolve.h" 52#include "dns_resolve.h"
51#include "cache_lib.h" 53#include "cache_lib.h"
52#include "netns.h" 54#include "netns.h"
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 18ab2da4eeb6..00ad1c2b217d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -312,7 +312,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags)
312} 312}
313EXPORT_SYMBOL_GPL(nfs4_label_alloc); 313EXPORT_SYMBOL_GPL(nfs4_label_alloc);
314#else 314#else
315void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, 315void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
316 struct nfs4_label *label) 316 struct nfs4_label *label)
317{ 317{
318} 318}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bca6a3e3c49c..8b5cc04a8611 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -269,6 +269,21 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
269extern struct rpc_procinfo nfs4_procedures[]; 269extern struct rpc_procinfo nfs4_procedures[];
270#endif 270#endif
271 271
272#ifdef CONFIG_NFS_V4_SECURITY_LABEL
273extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
274static inline void nfs4_label_free(struct nfs4_label *label)
275{
276 if (label) {
277 kfree(label->label);
278 kfree(label);
279 }
280 return;
281}
282#else
283static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
284static inline void nfs4_label_free(void *label) {}
285#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
286
272/* proc.c */ 287/* proc.c */
273void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 288void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
274extern struct nfs_client *nfs_init_client(struct nfs_client *clp, 289extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3ce79b04522e..5609edc742a0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -9,6 +9,14 @@
9#ifndef __LINUX_FS_NFS_NFS4_FS_H 9#ifndef __LINUX_FS_NFS_NFS4_FS_H
10#define __LINUX_FS_NFS_NFS4_FS_H 10#define __LINUX_FS_NFS_NFS4_FS_H
11 11
12#if defined(CONFIG_NFS_V4_2)
13#define NFS4_MAX_MINOR_VERSION 2
14#elif defined(CONFIG_NFS_V4_1)
15#define NFS4_MAX_MINOR_VERSION 1
16#else
17#define NFS4_MAX_MINOR_VERSION 0
18#endif
19
12#if IS_ENABLED(CONFIG_NFS_V4) 20#if IS_ENABLED(CONFIG_NFS_V4)
13 21
14#define NFS4_MAX_LOOP_ON_RECOVER (10) 22#define NFS4_MAX_LOOP_ON_RECOVER (10)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 659990c0109e..15052b81df42 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2518,9 +2518,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2518 calldata->roc_barrier); 2518 calldata->roc_barrier);
2519 nfs_set_open_stateid(state, &calldata->res.stateid, 0); 2519 nfs_set_open_stateid(state, &calldata->res.stateid, 0);
2520 renew_lease(server, calldata->timestamp); 2520 renew_lease(server, calldata->timestamp);
2521 nfs4_close_clear_stateid_flags(state,
2522 calldata->arg.fmode);
2523 break; 2521 break;
2522 case -NFS4ERR_ADMIN_REVOKED:
2524 case -NFS4ERR_STALE_STATEID: 2523 case -NFS4ERR_STALE_STATEID:
2525 case -NFS4ERR_OLD_STATEID: 2524 case -NFS4ERR_OLD_STATEID:
2526 case -NFS4ERR_BAD_STATEID: 2525 case -NFS4ERR_BAD_STATEID:
@@ -2528,9 +2527,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2528 if (calldata->arg.fmode == 0) 2527 if (calldata->arg.fmode == 0)
2529 break; 2528 break;
2530 default: 2529 default:
2531 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) 2530 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
2532 rpc_restart_call_prepare(task); 2531 rpc_restart_call_prepare(task);
2532 goto out_release;
2533 }
2533 } 2534 }
2535 nfs4_close_clear_stateid_flags(state, calldata->arg.fmode);
2536out_release:
2534 nfs_release_seqid(calldata->arg.seqid); 2537 nfs_release_seqid(calldata->arg.seqid);
2535 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 2538 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
2536 dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); 2539 dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
@@ -4802,7 +4805,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
4802 dprintk("%s ERROR %d, Reset session\n", __func__, 4805 dprintk("%s ERROR %d, Reset session\n", __func__,
4803 task->tk_status); 4806 task->tk_status);
4804 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); 4807 nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
4805 goto restart_call; 4808 goto wait_on_recovery;
4806#endif /* CONFIG_NFS_V4_1 */ 4809#endif /* CONFIG_NFS_V4_1 */
4807 case -NFS4ERR_DELAY: 4810 case -NFS4ERR_DELAY:
4808 nfs_inc_server_stats(server, NFSIOS_DELAY); 4811 nfs_inc_server_stats(server, NFSIOS_DELAY);
@@ -4987,11 +4990,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
4987 4990
4988 trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); 4991 trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
4989 switch (task->tk_status) { 4992 switch (task->tk_status) {
4990 case -NFS4ERR_STALE_STATEID:
4991 case -NFS4ERR_EXPIRED:
4992 case 0: 4993 case 0:
4993 renew_lease(data->res.server, data->timestamp); 4994 renew_lease(data->res.server, data->timestamp);
4994 break; 4995 break;
4996 case -NFS4ERR_ADMIN_REVOKED:
4997 case -NFS4ERR_DELEG_REVOKED:
4998 case -NFS4ERR_BAD_STATEID:
4999 case -NFS4ERR_OLD_STATEID:
5000 case -NFS4ERR_STALE_STATEID:
5001 case -NFS4ERR_EXPIRED:
5002 task->tk_status = 0;
5003 break;
4995 default: 5004 default:
4996 if (nfs4_async_handle_error(task, data->res.server, NULL) == 5005 if (nfs4_async_handle_error(task, data->res.server, NULL) ==
4997 -EAGAIN) { 5006 -EAGAIN) {
@@ -7589,7 +7598,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
7589 return; 7598 return;
7590 7599
7591 server = NFS_SERVER(lrp->args.inode); 7600 server = NFS_SERVER(lrp->args.inode);
7592 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { 7601 switch (task->tk_status) {
7602 default:
7603 task->tk_status = 0;
7604 case 0:
7605 break;
7606 case -NFS4ERR_DELAY:
7607 if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
7608 break;
7593 rpc_restart_call_prepare(task); 7609 rpc_restart_call_prepare(task);
7594 return; 7610 return;
7595 } 7611 }
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 9186c7ce0b14..b6af150c96b8 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -132,6 +132,13 @@ nfsd_reply_cache_alloc(void)
132} 132}
133 133
134static void 134static void
135nfsd_reply_cache_unhash(struct svc_cacherep *rp)
136{
137 hlist_del_init(&rp->c_hash);
138 list_del_init(&rp->c_lru);
139}
140
141static void
135nfsd_reply_cache_free_locked(struct svc_cacherep *rp) 142nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
136{ 143{
137 if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { 144 if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
@@ -417,7 +424,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
417 rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); 424 rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
418 if (nfsd_cache_entry_expired(rp) || 425 if (nfsd_cache_entry_expired(rp) ||
419 num_drc_entries >= max_drc_entries) { 426 num_drc_entries >= max_drc_entries) {
420 lru_put_end(rp); 427 nfsd_reply_cache_unhash(rp);
421 prune_cache_entries(); 428 prune_cache_entries();
422 goto search_cache; 429 goto search_cache;
423 } 430 }
diff --git a/fs/pipe.c b/fs/pipe.c
index d2c45e14e6d8..0e0752ef2715 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait)
726 return mask; 726 return mask;
727} 727}
728 728
729static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
730{
731 int kill = 0;
732
733 spin_lock(&inode->i_lock);
734 if (!--pipe->files) {
735 inode->i_pipe = NULL;
736 kill = 1;
737 }
738 spin_unlock(&inode->i_lock);
739
740 if (kill)
741 free_pipe_info(pipe);
742}
743
729static int 744static int
730pipe_release(struct inode *inode, struct file *file) 745pipe_release(struct inode *inode, struct file *file)
731{ 746{
732 struct pipe_inode_info *pipe = inode->i_pipe; 747 struct pipe_inode_info *pipe = file->private_data;
733 int kill = 0;
734 748
735 __pipe_lock(pipe); 749 __pipe_lock(pipe);
736 if (file->f_mode & FMODE_READ) 750 if (file->f_mode & FMODE_READ)
@@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file)
743 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 757 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
744 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 758 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
745 } 759 }
746 spin_lock(&inode->i_lock);
747 if (!--pipe->files) {
748 inode->i_pipe = NULL;
749 kill = 1;
750 }
751 spin_unlock(&inode->i_lock);
752 __pipe_unlock(pipe); 760 __pipe_unlock(pipe);
753 761
754 if (kill) 762 put_pipe_info(inode, pipe);
755 free_pipe_info(pipe);
756
757 return 0; 763 return 0;
758} 764}
759 765
@@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp)
1014{ 1020{
1015 struct pipe_inode_info *pipe; 1021 struct pipe_inode_info *pipe;
1016 bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; 1022 bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
1017 int kill = 0;
1018 int ret; 1023 int ret;
1019 1024
1020 filp->f_version = 0; 1025 filp->f_version = 0;
@@ -1130,15 +1135,9 @@ err_wr:
1130 goto err; 1135 goto err;
1131 1136
1132err: 1137err:
1133 spin_lock(&inode->i_lock);
1134 if (!--pipe->files) {
1135 inode->i_pipe = NULL;
1136 kill = 1;
1137 }
1138 spin_unlock(&inode->i_lock);
1139 __pipe_unlock(pipe); 1138 __pipe_unlock(pipe);
1140 if (kill) 1139
1141 free_pipe_info(pipe); 1140 put_pipe_info(inode, pipe);
1142 return ret; 1141 return ret;
1143} 1142}
1144 1143
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 28955d4b7218..124fc43c7090 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -292,16 +292,20 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
292{ 292{
293 struct proc_dir_entry *pde = PDE(file_inode(file)); 293 struct proc_dir_entry *pde = PDE(file_inode(file));
294 unsigned long rv = -EIO; 294 unsigned long rv = -EIO;
295 unsigned long (*get_area)(struct file *, unsigned long, unsigned long, 295
296 unsigned long, unsigned long) = NULL;
297 if (use_pde(pde)) { 296 if (use_pde(pde)) {
297 typeof(proc_reg_get_unmapped_area) *get_area;
298
299 get_area = pde->proc_fops->get_unmapped_area;
298#ifdef CONFIG_MMU 300#ifdef CONFIG_MMU
299 get_area = current->mm->get_unmapped_area; 301 if (!get_area)
302 get_area = current->mm->get_unmapped_area;
300#endif 303#endif
301 if (pde->proc_fops->get_unmapped_area) 304
302 get_area = pde->proc_fops->get_unmapped_area;
303 if (get_area) 305 if (get_area)
304 rv = get_area(file, orig_addr, len, pgoff, flags); 306 rv = get_area(file, orig_addr, len, pgoff, flags);
307 else
308 rv = orig_addr;
305 unuse_pde(pde); 309 unuse_pde(pde);
306 } 310 }
307 return rv; 311 return rv;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b8e93a40a5d3..78c3c2097787 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi)
443 pstore_get_records(0); 443 pstore_get_records(0);
444 444
445 kmsg_dump_register(&pstore_dumper); 445 kmsg_dump_register(&pstore_dumper);
446 pstore_register_console(); 446
447 pstore_register_ftrace(); 447 if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
448 pstore_register_console();
449 pstore_register_ftrace();
450 }
448 451
449 if (pstore_update_ms >= 0) { 452 if (pstore_update_ms >= 0) {
450 pstore_timer.expires = jiffies + 453 pstore_timer.expires = jiffies +
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 2943b2bfae48..62a0de6632e1 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -84,6 +84,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
84 */ 84 */
85 res = squashfs_read_cache(target_page, block, bsize, pages, 85 res = squashfs_read_cache(target_page, block, bsize, pages,
86 page); 86 page);
87 if (res < 0)
88 goto mark_errored;
89
87 goto out; 90 goto out;
88 } 91 }
89 92
@@ -119,7 +122,7 @@ mark_errored:
119 * dealt with by the caller 122 * dealt with by the caller
120 */ 123 */
121 for (i = 0; i < pages; i++) { 124 for (i = 0; i < pages; i++) {
122 if (page[i] == target_page) 125 if (page[i] == NULL || page[i] == target_page)
123 continue; 126 continue;
124 flush_dcache_page(page[i]); 127 flush_dcache_page(page[i]);
125 SetPageError(page[i]); 128 SetPageError(page[i]);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 79b5da2acbe1..35e7d08fe629 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -649,7 +649,23 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
649 if (!of) 649 if (!of)
650 goto err_out; 650 goto err_out;
651 651
652 mutex_init(&of->mutex); 652 /*
653 * The following is done to give a different lockdep key to
654 * @of->mutex for files which implement mmap. This is a rather
655 * crude way to avoid false positive lockdep warning around
656 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
657 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
658 * which mm->mmap_sem nests, while holding @of->mutex. As each
659 * open file has a separate mutex, it's okay as long as those don't
660 * happen on the same file. At this point, we can't easily give
661 * each file a separate locking class. Let's differentiate on
662 * whether the file is bin or not for now.
663 */
664 if (sysfs_is_bin(attr_sd))
665 mutex_init(&of->mutex);
666 else
667 mutex_init(&of->mutex);
668
653 of->sd = attr_sd; 669 of->sd = attr_sd;
654 of->file = file; 670 of->file = file;
655 671
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3ef11b22e750..3b2c14b6f0fb 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1635,7 +1635,7 @@ xfs_bmap_last_extent(
1635 * blocks at the end of the file which do not start at the previous data block, 1635 * blocks at the end of the file which do not start at the previous data block,
1636 * we will try to align the new blocks at stripe unit boundaries. 1636 * we will try to align the new blocks at stripe unit boundaries.
1637 * 1637 *
1638 * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be 1638 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1639 * at, or past the EOF. 1639 * at, or past the EOF.
1640 */ 1640 */
1641STATIC int 1641STATIC int
@@ -1650,9 +1650,14 @@ xfs_bmap_isaeof(
1650 bma->aeof = 0; 1650 bma->aeof = 0;
1651 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, 1651 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1652 &is_empty); 1652 &is_empty);
1653 if (error || is_empty) 1653 if (error)
1654 return error; 1654 return error;
1655 1655
1656 if (is_empty) {
1657 bma->aeof = 1;
1658 return 0;
1659 }
1660
1656 /* 1661 /*
1657 * Check if we are allocation or past the last extent, or at least into 1662 * Check if we are allocation or past the last extent, or at least into
1658 * the last delayed allocated extent. 1663 * the last delayed allocated extent.
@@ -3643,10 +3648,19 @@ xfs_bmap_btalloc(
3643 int isaligned; 3648 int isaligned;
3644 int tryagain; 3649 int tryagain;
3645 int error; 3650 int error;
3651 int stripe_align;
3646 3652
3647 ASSERT(ap->length); 3653 ASSERT(ap->length);
3648 3654
3649 mp = ap->ip->i_mount; 3655 mp = ap->ip->i_mount;
3656
3657 /* stripe alignment for allocation is determined by mount parameters */
3658 stripe_align = 0;
3659 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3660 stripe_align = mp->m_swidth;
3661 else if (mp->m_dalign)
3662 stripe_align = mp->m_dalign;
3663
3650 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; 3664 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
3651 if (unlikely(align)) { 3665 if (unlikely(align)) {
3652 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 3666 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3655,6 +3669,8 @@ xfs_bmap_btalloc(
3655 ASSERT(!error); 3669 ASSERT(!error);
3656 ASSERT(ap->length); 3670 ASSERT(ap->length);
3657 } 3671 }
3672
3673
3658 nullfb = *ap->firstblock == NULLFSBLOCK; 3674 nullfb = *ap->firstblock == NULLFSBLOCK;
3659 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); 3675 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3660 if (nullfb) { 3676 if (nullfb) {
@@ -3730,7 +3746,7 @@ xfs_bmap_btalloc(
3730 */ 3746 */
3731 if (!ap->flist->xbf_low && ap->aeof) { 3747 if (!ap->flist->xbf_low && ap->aeof) {
3732 if (!ap->offset) { 3748 if (!ap->offset) {
3733 args.alignment = mp->m_dalign; 3749 args.alignment = stripe_align;
3734 atype = args.type; 3750 atype = args.type;
3735 isaligned = 1; 3751 isaligned = 1;
3736 /* 3752 /*
@@ -3755,13 +3771,13 @@ xfs_bmap_btalloc(
3755 * of minlen+alignment+slop doesn't go up 3771 * of minlen+alignment+slop doesn't go up
3756 * between the calls. 3772 * between the calls.
3757 */ 3773 */
3758 if (blen > mp->m_dalign && blen <= args.maxlen) 3774 if (blen > stripe_align && blen <= args.maxlen)
3759 nextminlen = blen - mp->m_dalign; 3775 nextminlen = blen - stripe_align;
3760 else 3776 else
3761 nextminlen = args.minlen; 3777 nextminlen = args.minlen;
3762 if (nextminlen + mp->m_dalign > args.minlen + 1) 3778 if (nextminlen + stripe_align > args.minlen + 1)
3763 args.minalignslop = 3779 args.minalignslop =
3764 nextminlen + mp->m_dalign - 3780 nextminlen + stripe_align -
3765 args.minlen - 1; 3781 args.minlen - 1;
3766 else 3782 else
3767 args.minalignslop = 0; 3783 args.minalignslop = 0;
@@ -3783,7 +3799,7 @@ xfs_bmap_btalloc(
3783 */ 3799 */
3784 args.type = atype; 3800 args.type = atype;
3785 args.fsbno = ap->blkno; 3801 args.fsbno = ap->blkno;
3786 args.alignment = mp->m_dalign; 3802 args.alignment = stripe_align;
3787 args.minlen = nextminlen; 3803 args.minlen = nextminlen;
3788 args.minalignslop = 0; 3804 args.minalignslop = 0;
3789 isaligned = 1; 3805 isaligned = 1;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5887e41c0323..1394106ed22d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1187,7 +1187,12 @@ xfs_zero_remaining_bytes(
1187 XFS_BUF_UNWRITE(bp); 1187 XFS_BUF_UNWRITE(bp);
1188 XFS_BUF_READ(bp); 1188 XFS_BUF_READ(bp);
1189 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 1189 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
1190 xfsbdstrat(mp, bp); 1190
1191 if (XFS_FORCED_SHUTDOWN(mp)) {
1192 error = XFS_ERROR(EIO);
1193 break;
1194 }
1195 xfs_buf_iorequest(bp);
1191 error = xfs_buf_iowait(bp); 1196 error = xfs_buf_iowait(bp);
1192 if (error) { 1197 if (error) {
1193 xfs_buf_ioerror_alert(bp, 1198 xfs_buf_ioerror_alert(bp,
@@ -1200,7 +1205,12 @@ xfs_zero_remaining_bytes(
1200 XFS_BUF_UNDONE(bp); 1205 XFS_BUF_UNDONE(bp);
1201 XFS_BUF_UNREAD(bp); 1206 XFS_BUF_UNREAD(bp);
1202 XFS_BUF_WRITE(bp); 1207 XFS_BUF_WRITE(bp);
1203 xfsbdstrat(mp, bp); 1208
1209 if (XFS_FORCED_SHUTDOWN(mp)) {
1210 error = XFS_ERROR(EIO);
1211 break;
1212 }
1213 xfs_buf_iorequest(bp);
1204 error = xfs_buf_iowait(bp); 1214 error = xfs_buf_iowait(bp);
1205 if (error) { 1215 if (error) {
1206 xfs_buf_ioerror_alert(bp, 1216 xfs_buf_ioerror_alert(bp,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c7f0b77dcb00..afe7645e4b2b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -698,7 +698,11 @@ xfs_buf_read_uncached(
698 bp->b_flags |= XBF_READ; 698 bp->b_flags |= XBF_READ;
699 bp->b_ops = ops; 699 bp->b_ops = ops;
700 700
701 xfsbdstrat(target->bt_mount, bp); 701 if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
702 xfs_buf_relse(bp);
703 return NULL;
704 }
705 xfs_buf_iorequest(bp);
702 xfs_buf_iowait(bp); 706 xfs_buf_iowait(bp);
703 return bp; 707 return bp;
704} 708}
@@ -1089,7 +1093,7 @@ xfs_bioerror(
1089 * This is meant for userdata errors; metadata bufs come with 1093 * This is meant for userdata errors; metadata bufs come with
1090 * iodone functions attached, so that we can track down errors. 1094 * iodone functions attached, so that we can track down errors.
1091 */ 1095 */
1092STATIC int 1096int
1093xfs_bioerror_relse( 1097xfs_bioerror_relse(
1094 struct xfs_buf *bp) 1098 struct xfs_buf *bp)
1095{ 1099{
@@ -1152,7 +1156,7 @@ xfs_bwrite(
1152 ASSERT(xfs_buf_islocked(bp)); 1156 ASSERT(xfs_buf_islocked(bp));
1153 1157
1154 bp->b_flags |= XBF_WRITE; 1158 bp->b_flags |= XBF_WRITE;
1155 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); 1159 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
1156 1160
1157 xfs_bdstrat_cb(bp); 1161 xfs_bdstrat_cb(bp);
1158 1162
@@ -1164,25 +1168,6 @@ xfs_bwrite(
1164 return error; 1168 return error;
1165} 1169}
1166 1170
1167/*
1168 * Wrapper around bdstrat so that we can stop data from going to disk in case
1169 * we are shutting down the filesystem. Typically user data goes thru this
1170 * path; one of the exceptions is the superblock.
1171 */
1172void
1173xfsbdstrat(
1174 struct xfs_mount *mp,
1175 struct xfs_buf *bp)
1176{
1177 if (XFS_FORCED_SHUTDOWN(mp)) {
1178 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1179 xfs_bioerror_relse(bp);
1180 return;
1181 }
1182
1183 xfs_buf_iorequest(bp);
1184}
1185
1186STATIC void 1171STATIC void
1187_xfs_buf_ioend( 1172_xfs_buf_ioend(
1188 xfs_buf_t *bp, 1173 xfs_buf_t *bp,
@@ -1516,6 +1501,12 @@ xfs_wait_buftarg(
1516 struct xfs_buf *bp; 1501 struct xfs_buf *bp;
1517 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1502 bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1518 list_del_init(&bp->b_lru); 1503 list_del_init(&bp->b_lru);
1504 if (bp->b_flags & XBF_WRITE_FAIL) {
1505 xfs_alert(btp->bt_mount,
1506"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
1507"Please run xfs_repair to determine the extent of the problem.",
1508 (long long)bp->b_bn);
1509 }
1519 xfs_buf_rele(bp); 1510 xfs_buf_rele(bp);
1520 } 1511 }
1521 if (loop++ != 0) 1512 if (loop++ != 0)
@@ -1799,7 +1790,7 @@ __xfs_buf_delwri_submit(
1799 1790
1800 blk_start_plug(&plug); 1791 blk_start_plug(&plug);
1801 list_for_each_entry_safe(bp, n, io_list, b_list) { 1792 list_for_each_entry_safe(bp, n, io_list, b_list) {
1802 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); 1793 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
1803 bp->b_flags |= XBF_WRITE; 1794 bp->b_flags |= XBF_WRITE;
1804 1795
1805 if (!wait) { 1796 if (!wait) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e65683361017..1cf21a4a9f22 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -45,6 +45,7 @@ typedef enum {
45#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 45#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
46#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 46#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
47#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ 47#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
48#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */
48 49
49/* I/O hints for the BIO layer */ 50/* I/O hints for the BIO layer */
50#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ 51#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
70 { XBF_ASYNC, "ASYNC" }, \ 71 { XBF_ASYNC, "ASYNC" }, \
71 { XBF_DONE, "DONE" }, \ 72 { XBF_DONE, "DONE" }, \
72 { XBF_STALE, "STALE" }, \ 73 { XBF_STALE, "STALE" }, \
74 { XBF_WRITE_FAIL, "WRITE_FAIL" }, \
73 { XBF_SYNCIO, "SYNCIO" }, \ 75 { XBF_SYNCIO, "SYNCIO" }, \
74 { XBF_FUA, "FUA" }, \ 76 { XBF_FUA, "FUA" }, \
75 { XBF_FLUSH, "FLUSH" }, \ 77 { XBF_FLUSH, "FLUSH" }, \
@@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t;
80 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 82 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
81 { _XBF_COMPOUND, "COMPOUND" } 83 { _XBF_COMPOUND, "COMPOUND" }
82 84
85
83/* 86/*
84 * Internal state flags. 87 * Internal state flags.
85 */ 88 */
@@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
269 272
270/* Buffer Read and Write Routines */ 273/* Buffer Read and Write Routines */
271extern int xfs_bwrite(struct xfs_buf *bp); 274extern int xfs_bwrite(struct xfs_buf *bp);
272
273extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
274
275extern void xfs_buf_ioend(xfs_buf_t *, int); 275extern void xfs_buf_ioend(xfs_buf_t *, int);
276extern void xfs_buf_ioerror(xfs_buf_t *, int); 276extern void xfs_buf_ioerror(xfs_buf_t *, int);
277extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); 277extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
@@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
282#define xfs_buf_zero(bp, off, len) \ 282#define xfs_buf_zero(bp, off, len) \
283 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 283 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
284 284
285extern int xfs_bioerror_relse(struct xfs_buf *);
286
285static inline int xfs_buf_geterror(xfs_buf_t *bp) 287static inline int xfs_buf_geterror(xfs_buf_t *bp)
286{ 288{
287 return bp ? bp->b_error : ENOMEM; 289 return bp ? bp->b_error : ENOMEM;
@@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void);
301 303
302#define XFS_BUF_ZEROFLAGS(bp) \ 304#define XFS_BUF_ZEROFLAGS(bp) \
303 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ 305 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
304 XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) 306 XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
307 XBF_WRITE_FAIL))
305 308
306void xfs_buf_stale(struct xfs_buf *bp); 309void xfs_buf_stale(struct xfs_buf *bp);
307#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) 310#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a64f67ba25d3..2227b9b050bb 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -496,6 +496,14 @@ xfs_buf_item_unpin(
496 } 496 }
497} 497}
498 498
499/*
500 * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
501 * seconds so as to not spam logs too much on repeated detection of the same
502 * buffer being bad..
503 */
504
505DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
506
499STATIC uint 507STATIC uint
500xfs_buf_item_push( 508xfs_buf_item_push(
501 struct xfs_log_item *lip, 509 struct xfs_log_item *lip,
@@ -524,6 +532,14 @@ xfs_buf_item_push(
524 532
525 trace_xfs_buf_item_push(bip); 533 trace_xfs_buf_item_push(bip);
526 534
535 /* has a previous flush failed due to IO errors? */
536 if ((bp->b_flags & XBF_WRITE_FAIL) &&
537 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
538 xfs_warn(bp->b_target->bt_mount,
539"Detected failing async write on buffer block 0x%llx. Retrying async write.\n",
540 (long long)bp->b_bn);
541 }
542
527 if (!xfs_buf_delwri_queue(bp, buffer_list)) 543 if (!xfs_buf_delwri_queue(bp, buffer_list))
528 rval = XFS_ITEM_FLUSHING; 544 rval = XFS_ITEM_FLUSHING;
529 xfs_buf_unlock(bp); 545 xfs_buf_unlock(bp);
@@ -1096,8 +1112,9 @@ xfs_buf_iodone_callbacks(
1096 1112
1097 xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ 1113 xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
1098 1114
1099 if (!XFS_BUF_ISSTALE(bp)) { 1115 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
1100 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; 1116 bp->b_flags |= XBF_WRITE | XBF_ASYNC |
1117 XBF_DONE | XBF_WRITE_FAIL;
1101 xfs_buf_iorequest(bp); 1118 xfs_buf_iorequest(bp);
1102 } else { 1119 } else {
1103 xfs_buf_relse(bp); 1120 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56369d4509d5..48c7d18f68c3 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup(
2067 */ 2067 */
2068int /* error */ 2068int /* error */
2069xfs_dir2_node_removename( 2069xfs_dir2_node_removename(
2070 xfs_da_args_t *args) /* operation arguments */ 2070 struct xfs_da_args *args) /* operation arguments */
2071{ 2071{
2072 xfs_da_state_blk_t *blk; /* leaf block */ 2072 struct xfs_da_state_blk *blk; /* leaf block */
2073 int error; /* error return value */ 2073 int error; /* error return value */
2074 int rval; /* operation return value */ 2074 int rval; /* operation return value */
2075 xfs_da_state_t *state; /* btree cursor */ 2075 struct xfs_da_state *state; /* btree cursor */
2076 2076
2077 trace_xfs_dir2_node_removename(args); 2077 trace_xfs_dir2_node_removename(args);
2078 2078
@@ -2084,19 +2084,18 @@ xfs_dir2_node_removename(
2084 state->mp = args->dp->i_mount; 2084 state->mp = args->dp->i_mount;
2085 state->blocksize = state->mp->m_dirblksize; 2085 state->blocksize = state->mp->m_dirblksize;
2086 state->node_ents = state->mp->m_dir_node_ents; 2086 state->node_ents = state->mp->m_dir_node_ents;
2087 /* 2087
2088 * Look up the entry we're deleting, set up the cursor. 2088 /* Look up the entry we're deleting, set up the cursor. */
2089 */
2090 error = xfs_da3_node_lookup_int(state, &rval); 2089 error = xfs_da3_node_lookup_int(state, &rval);
2091 if (error) 2090 if (error)
2092 rval = error; 2091 goto out_free;
2093 /* 2092
2094 * Didn't find it, upper layer screwed up. 2093 /* Didn't find it, upper layer screwed up. */
2095 */
2096 if (rval != EEXIST) { 2094 if (rval != EEXIST) {
2097 xfs_da_state_free(state); 2095 error = rval;
2098 return rval; 2096 goto out_free;
2099 } 2097 }
2098
2100 blk = &state->path.blk[state->path.active - 1]; 2099 blk = &state->path.blk[state->path.active - 1];
2101 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); 2100 ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
2102 ASSERT(state->extravalid); 2101 ASSERT(state->extravalid);
@@ -2107,7 +2106,7 @@ xfs_dir2_node_removename(
2107 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, 2106 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
2108 &state->extrablk, &rval); 2107 &state->extrablk, &rval);
2109 if (error) 2108 if (error)
2110 return error; 2109 goto out_free;
2111 /* 2110 /*
2112 * Fix the hash values up the btree. 2111 * Fix the hash values up the btree.
2113 */ 2112 */
@@ -2122,6 +2121,7 @@ xfs_dir2_node_removename(
2122 */ 2121 */
2123 if (!error) 2122 if (!error)
2124 error = xfs_dir2_node_to_leaf(state); 2123 error = xfs_dir2_node_to_leaf(state);
2124out_free:
2125 xfs_da_state_free(state); 2125 xfs_da_state_free(state);
2126 return error; 2126 return error;
2127} 2127}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 8367d6dc18c9..4f11ef011139 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -157,7 +157,7 @@ xfs_ioc_trim(
157 struct xfs_mount *mp, 157 struct xfs_mount *mp,
158 struct fstrim_range __user *urange) 158 struct fstrim_range __user *urange)
159{ 159{
160 struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; 160 struct request_queue *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev);
161 unsigned int granularity = q->limits.discard_granularity; 161 unsigned int granularity = q->limits.discard_granularity;
162 struct fstrim_range range; 162 struct fstrim_range range;
163 xfs_daddr_t start, end, minlen; 163 xfs_daddr_t start, end, minlen;
@@ -180,7 +180,8 @@ xfs_ioc_trim(
180 * matter as trimming blocks is an advisory interface. 180 * matter as trimming blocks is an advisory interface.
181 */ 181 */
182 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || 182 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
183 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) 183 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
184 range.len < mp->m_sb.sb_blocksize)
184 return -XFS_ERROR(EINVAL); 185 return -XFS_ERROR(EINVAL);
185 186
186 start = BTOBB(range.start); 187 start = BTOBB(range.start);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a6e54b3319bd..02fb943cbf22 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -220,6 +220,8 @@ xfs_growfs_data_private(
220 */ 220 */
221 nfree = 0; 221 nfree = 0;
222 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { 222 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
223 __be32 *agfl_bno;
224
223 /* 225 /*
224 * AG freespace header block 226 * AG freespace header block
225 */ 227 */
@@ -279,8 +281,10 @@ xfs_growfs_data_private(
279 agfl->agfl_seqno = cpu_to_be32(agno); 281 agfl->agfl_seqno = cpu_to_be32(agno);
280 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); 282 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
281 } 283 }
284
285 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
282 for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) 286 for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
283 agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); 287 agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
284 288
285 error = xfs_bwrite(bp); 289 error = xfs_bwrite(bp);
286 xfs_buf_relse(bp); 290 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 4d613401a5e0..33ad9a77791f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -442,7 +442,8 @@ xfs_attrlist_by_handle(
442 return -XFS_ERROR(EPERM); 442 return -XFS_ERROR(EPERM);
443 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) 443 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
444 return -XFS_ERROR(EFAULT); 444 return -XFS_ERROR(EFAULT);
445 if (al_hreq.buflen > XATTR_LIST_MAX) 445 if (al_hreq.buflen < sizeof(struct attrlist) ||
446 al_hreq.buflen > XATTR_LIST_MAX)
446 return -XFS_ERROR(EINVAL); 447 return -XFS_ERROR(EINVAL);
447 448
448 /* 449 /*
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index e8fb1231db81..a7992f8de9d3 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -356,7 +356,8 @@ xfs_compat_attrlist_by_handle(
356 if (copy_from_user(&al_hreq, arg, 356 if (copy_from_user(&al_hreq, arg,
357 sizeof(compat_xfs_fsop_attrlist_handlereq_t))) 357 sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
358 return -XFS_ERROR(EFAULT); 358 return -XFS_ERROR(EFAULT);
359 if (al_hreq.buflen > XATTR_LIST_MAX) 359 if (al_hreq.buflen < sizeof(struct attrlist) ||
360 al_hreq.buflen > XATTR_LIST_MAX)
360 return -XFS_ERROR(EINVAL); 361 return -XFS_ERROR(EINVAL);
361 362
362 /* 363 /*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 27e0e544e963..104455b8046c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -618,7 +618,8 @@ xfs_setattr_nonsize(
618 } 618 }
619 if (!gid_eq(igid, gid)) { 619 if (!gid_eq(igid, gid)) {
620 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { 620 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
621 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 621 ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
622 !XFS_IS_PQUOTA_ON(mp));
622 ASSERT(mask & ATTR_GID); 623 ASSERT(mask & ATTR_GID);
623 ASSERT(gdqp); 624 ASSERT(gdqp);
624 olddquot2 = xfs_qm_vop_chown(tp, ip, 625 olddquot2 = xfs_qm_vop_chown(tp, ip,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b6b669df40f3..eae16920655b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,7 +193,10 @@ xlog_bread_noalign(
193 bp->b_io_length = nbblks; 193 bp->b_io_length = nbblks;
194 bp->b_error = 0; 194 bp->b_error = 0;
195 195
196 xfsbdstrat(log->l_mp, bp); 196 if (XFS_FORCED_SHUTDOWN(log->l_mp))
197 return XFS_ERROR(EIO);
198
199 xfs_buf_iorequest(bp);
197 error = xfs_buf_iowait(bp); 200 error = xfs_buf_iowait(bp);
198 if (error) 201 if (error)
199 xfs_buf_ioerror_alert(bp, __func__); 202 xfs_buf_ioerror_alert(bp, __func__);
@@ -4397,7 +4400,13 @@ xlog_do_recover(
4397 XFS_BUF_READ(bp); 4400 XFS_BUF_READ(bp);
4398 XFS_BUF_UNASYNC(bp); 4401 XFS_BUF_UNASYNC(bp);
4399 bp->b_ops = &xfs_sb_buf_ops; 4402 bp->b_ops = &xfs_sb_buf_ops;
4400 xfsbdstrat(log->l_mp, bp); 4403
4404 if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
4405 xfs_buf_relse(bp);
4406 return XFS_ERROR(EIO);
4407 }
4408
4409 xfs_buf_iorequest(bp);
4401 error = xfs_buf_iowait(bp); 4410 error = xfs_buf_iowait(bp);
4402 if (error) { 4411 if (error) {
4403 xfs_buf_ioerror_alert(bp, __func__); 4412 xfs_buf_ioerror_alert(bp, __func__);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996cfec6..dd88f0e27bd8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,8 +134,6 @@ xfs_qm_dqpurge(
134{ 134{
135 struct xfs_mount *mp = dqp->q_mount; 135 struct xfs_mount *mp = dqp->q_mount;
136 struct xfs_quotainfo *qi = mp->m_quotainfo; 136 struct xfs_quotainfo *qi = mp->m_quotainfo;
137 struct xfs_dquot *gdqp = NULL;
138 struct xfs_dquot *pdqp = NULL;
139 137
140 xfs_dqlock(dqp); 138 xfs_dqlock(dqp);
141 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { 139 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
@@ -143,21 +141,6 @@ xfs_qm_dqpurge(
143 return EAGAIN; 141 return EAGAIN;
144 } 142 }
145 143
146 /*
147 * If this quota has a hint attached, prepare for releasing it now.
148 */
149 gdqp = dqp->q_gdquot;
150 if (gdqp) {
151 xfs_dqlock(gdqp);
152 dqp->q_gdquot = NULL;
153 }
154
155 pdqp = dqp->q_pdquot;
156 if (pdqp) {
157 xfs_dqlock(pdqp);
158 dqp->q_pdquot = NULL;
159 }
160
161 dqp->dq_flags |= XFS_DQ_FREEING; 144 dqp->dq_flags |= XFS_DQ_FREEING;
162 145
163 xfs_dqflock(dqp); 146 xfs_dqflock(dqp);
@@ -206,11 +189,47 @@ xfs_qm_dqpurge(
206 XFS_STATS_DEC(xs_qm_dquot_unused); 189 XFS_STATS_DEC(xs_qm_dquot_unused);
207 190
208 xfs_qm_dqdestroy(dqp); 191 xfs_qm_dqdestroy(dqp);
192 return 0;
193}
194
195/*
196 * Release the group or project dquot pointers the user dquots maybe carrying
197 * around as a hint, and proceed to purge the user dquot cache if requested.
198*/
199STATIC int
200xfs_qm_dqpurge_hints(
201 struct xfs_dquot *dqp,
202 void *data)
203{
204 struct xfs_dquot *gdqp = NULL;
205 struct xfs_dquot *pdqp = NULL;
206 uint flags = *((uint *)data);
207
208 xfs_dqlock(dqp);
209 if (dqp->dq_flags & XFS_DQ_FREEING) {
210 xfs_dqunlock(dqp);
211 return EAGAIN;
212 }
213
214 /* If this quota has a hint attached, prepare for releasing it now */
215 gdqp = dqp->q_gdquot;
216 if (gdqp)
217 dqp->q_gdquot = NULL;
218
219 pdqp = dqp->q_pdquot;
220 if (pdqp)
221 dqp->q_pdquot = NULL;
222
223 xfs_dqunlock(dqp);
209 224
210 if (gdqp) 225 if (gdqp)
211 xfs_qm_dqput(gdqp); 226 xfs_qm_dqrele(gdqp);
212 if (pdqp) 227 if (pdqp)
213 xfs_qm_dqput(pdqp); 228 xfs_qm_dqrele(pdqp);
229
230 if (flags & XFS_QMOPT_UQUOTA)
231 return xfs_qm_dqpurge(dqp, NULL);
232
214 return 0; 233 return 0;
215} 234}
216 235
@@ -222,8 +241,18 @@ xfs_qm_dqpurge_all(
222 struct xfs_mount *mp, 241 struct xfs_mount *mp,
223 uint flags) 242 uint flags)
224{ 243{
225 if (flags & XFS_QMOPT_UQUOTA) 244 /*
226 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); 245 * We have to release group/project dquot hint(s) from the user dquot
246 * at first if they are there, otherwise we would run into an infinite
247 * loop while walking through radix tree to purge other type of dquots
248 * since their refcount is not zero if the user dquot refers to them
249 * as hint.
250 *
251 * Call the special xfs_qm_dqpurge_hints() will end up go through the
252 * general xfs_qm_dqpurge() against user dquot cache if requested.
253 */
254 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
255
227 if (flags & XFS_QMOPT_GQUOTA) 256 if (flags & XFS_QMOPT_GQUOTA)
228 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); 257 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
229 if (flags & XFS_QMOPT_PQUOTA) 258 if (flags & XFS_QMOPT_PQUOTA)
@@ -2082,24 +2111,21 @@ xfs_qm_vop_create_dqattach(
2082 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2111 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2083 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2112 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2084 2113
2085 if (udqp) { 2114 if (udqp && XFS_IS_UQUOTA_ON(mp)) {
2086 ASSERT(ip->i_udquot == NULL); 2115 ASSERT(ip->i_udquot == NULL);
2087 ASSERT(XFS_IS_UQUOTA_ON(mp));
2088 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2116 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2089 2117
2090 ip->i_udquot = xfs_qm_dqhold(udqp); 2118 ip->i_udquot = xfs_qm_dqhold(udqp);
2091 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2119 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2092 } 2120 }
2093 if (gdqp) { 2121 if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
2094 ASSERT(ip->i_gdquot == NULL); 2122 ASSERT(ip->i_gdquot == NULL);
2095 ASSERT(XFS_IS_GQUOTA_ON(mp));
2096 ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); 2123 ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
2097 ip->i_gdquot = xfs_qm_dqhold(gdqp); 2124 ip->i_gdquot = xfs_qm_dqhold(gdqp);
2098 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2125 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2099 } 2126 }
2100 if (pdqp) { 2127 if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
2101 ASSERT(ip->i_pdquot == NULL); 2128 ASSERT(ip->i_pdquot == NULL);
2102 ASSERT(XFS_IS_PQUOTA_ON(mp));
2103 ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); 2129 ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
2104 2130
2105 ip->i_pdquot = xfs_qm_dqhold(pdqp); 2131 ip->i_pdquot = xfs_qm_dqhold(pdqp);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c035d11b7734..647b6f1d8923 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -314,7 +314,18 @@ xfs_trans_read_buf_map(
314 ASSERT(bp->b_iodone == NULL); 314 ASSERT(bp->b_iodone == NULL);
315 XFS_BUF_READ(bp); 315 XFS_BUF_READ(bp);
316 bp->b_ops = ops; 316 bp->b_ops = ops;
317 xfsbdstrat(tp->t_mountp, bp); 317
318 /*
319 * XXX(hch): clean up the error handling here to be less
320 * of a mess..
321 */
322 if (XFS_FORCED_SHUTDOWN(mp)) {
323 trace_xfs_bdstrat_shut(bp, _RET_IP_);
324 xfs_bioerror_relse(bp);
325 } else {
326 xfs_buf_iorequest(bp);
327 }
328
318 error = xfs_buf_iowait(bp); 329 error = xfs_buf_iowait(bp);
319 if (error) { 330 if (error) {
320 xfs_buf_ioerror_alert(bp, __func__); 331 xfs_buf_ioerror_alert(bp, __func__);