aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c11
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/compression.c27
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c100
-rw-r--r--fs/btrfs/extent_io.c54
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/free-space-cache.c162
-rw-r--r--fs/btrfs/inode.c38
-rw-r--r--fs/btrfs/ioctl.c29
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/print-tree.c1
-rw-r--r--fs/btrfs/relocation.c30
-rw-r--r--fs/btrfs/super.c9
-rw-r--r--fs/btrfs/transaction.c5
-rw-r--r--fs/btrfs/tree-log.c35
-rw-r--r--fs/btrfs/volumes.c21
-rw-r--r--fs/ceph/caps.c43
-rw-r--r--fs/ceph/inode.c10
-rw-r--r--fs/ceph/mds_client.c10
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/xattr.c3
-rw-r--r--fs/cifs/Kconfig1
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README5
-rw-r--r--fs/cifs/cifs_dfs_ref.c10
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsencrypt.c38
-rw-r--r--fs/cifs/cifsencrypt.h33
-rw-r--r--fs/cifs/cifsfs.c15
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/cifsglob.h37
-rw-r--r--fs/cifs/cifsproto.h11
-rw-r--r--fs/cifs/cifssmb.c8
-rw-r--r--fs/cifs/connect.c70
-rw-r--r--fs/cifs/file.c211
-rw-r--r--fs/cifs/link.c59
-rw-r--r--fs/cifs/md4.c205
-rw-r--r--fs/cifs/md5.c366
-rw-r--r--fs/cifs/md5.h38
-rw-r--r--fs/cifs/misc.c116
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/cifs/smbdes.c1
-rw-r--r--fs/cifs/smbencrypt.c92
-rw-r--r--fs/cifs/transport.c69
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/eventpoll.c16
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/file.c60
-rw-r--r--fs/ext4/mballoc.c100
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/main.c2
-rw-r--r--fs/hfsplus/extents.c4
-rw-r--r--fs/hfsplus/part_tbl.c4
-rw-r--r--fs/hfsplus/super.c106
-rw-r--r--fs/hfsplus/wrapper.c4
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/jbd2/transaction.c21
-rw-r--r--fs/lockd/host.c9
-rw-r--r--fs/namei.c151
-rw-r--r--fs/nfs/callback.c109
-rw-r--r--fs/nfs/callback.h4
-rw-r--r--fs/nfs/callback_proc.c12
-rw-r--r--fs/nfs/callback_xdr.c5
-rw-r--r--fs/nfs/client.c15
-rw-r--r--fs/nfs/delegation.c6
-rw-r--r--fs/nfs/direct.c34
-rw-r--r--fs/nfs/inode.c26
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4filelayoutdev.c9
-rw-r--r--fs/nfs/nfs4proc.c30
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4xdr.c9
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfs_common/nfsacl.c54
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4state.c186
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/nilfs2/super.c3
-rw-r--r--fs/ntfs/mft.c11
-rw-r--r--fs/open.c2
-rw-r--r--fs/partitions/mac.c17
-rw-r--r--fs/posix_acl.c17
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/consoles.c4
-rw-r--r--fs/squashfs/block.c8
-rw-r--r--fs/squashfs/xz_wrapper.c6
-rw-r--r--fs/squashfs/zlib_wrapper.c6
-rw-r--r--fs/super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c20
-rw-r--r--fs/xfs/quota/xfs_qm.c46
-rw-r--r--fs/xfs/xfs_alloc.h16
-rw-r--r--fs/xfs/xfs_bmap.c61
-rw-r--r--fs/xfs/xfs_buf_item.c12
-rw-r--r--fs/xfs/xfs_extfree_item.c3
-rw-r--r--fs/xfs/xfs_iomap.c7
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c15
-rw-r--r--fs/xfs/xfs_trans.c41
115 files changed, 1965 insertions, 1581 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..4fb8a3431531 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1215,12 +1215,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1215 1215
1216 res = __blkdev_get(bdev, mode, 0); 1216 res = __blkdev_get(bdev, mode, 0);
1217 1217
1218 /* __blkdev_get() may alter read only status, check it afterwards */
1219 if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1220 __blkdev_put(bdev, mode, 0);
1221 res = -EACCES;
1222 }
1223
1224 if (whole) { 1218 if (whole) {
1225 /* finish claiming */ 1219 /* finish claiming */
1226 mutex_lock(&bdev->bd_mutex); 1220 mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1292,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1298 if (err) 1292 if (err)
1299 return ERR_PTR(err); 1293 return ERR_PTR(err);
1300 1294
1295 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1296 blkdev_put(bdev, mode);
1297 return ERR_PTR(-EACCES);
1298 }
1299
1301 return bdev; 1300 return bdev;
1302} 1301}
1303EXPORT_SYMBOL(blkdev_get_by_path); 1302EXPORT_SYMBOL(blkdev_get_by_path);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15b5ca2a2606..9c949348510b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
37 char *value = NULL; 37 char *value = NULL;
38 struct posix_acl *acl; 38 struct posix_acl *acl;
39 39
40 if (!IS_POSIXACL(inode))
41 return NULL;
42
40 acl = get_cached_acl(inode, type); 43 acl = get_cached_acl(inode, type);
41 if (acl != ACL_NOT_CACHED) 44 if (acl != ACL_NOT_CACHED)
42 return acl; 45 return acl;
@@ -84,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
84 struct posix_acl *acl; 87 struct posix_acl *acl;
85 int ret = 0; 88 int ret = 0;
86 89
90 if (!IS_POSIXACL(dentry->d_inode))
91 return -EOPNOTSUPP;
92
87 acl = btrfs_get_acl(dentry->d_inode, type); 93 acl = btrfs_get_acl(dentry->d_inode, type);
88 94
89 if (IS_ERR(acl)) 95 if (IS_ERR(acl))
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f745287fbf2e..4d2110eafe29 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -562,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
562 u64 em_len; 562 u64 em_len;
563 u64 em_start; 563 u64 em_start;
564 struct extent_map *em; 564 struct extent_map *em;
565 int ret; 565 int ret = -ENOMEM;
566 u32 *sums; 566 u32 *sums;
567 567
568 tree = &BTRFS_I(inode)->io_tree; 568 tree = &BTRFS_I(inode)->io_tree;
@@ -577,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
577 577
578 compressed_len = em->block_len; 578 compressed_len = em->block_len;
579 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 579 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
580 if (!cb)
581 goto out;
582
580 atomic_set(&cb->pending_bios, 0); 583 atomic_set(&cb->pending_bios, 0);
581 cb->errors = 0; 584 cb->errors = 0;
582 cb->inode = inode; 585 cb->inode = inode;
@@ -597,13 +600,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
597 600
598 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / 601 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
599 PAGE_CACHE_SIZE; 602 PAGE_CACHE_SIZE;
600 cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, 603 cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
601 GFP_NOFS); 604 GFP_NOFS);
605 if (!cb->compressed_pages)
606 goto fail1;
607
602 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 608 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
603 609
604 for (page_index = 0; page_index < nr_pages; page_index++) { 610 for (page_index = 0; page_index < nr_pages; page_index++) {
605 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | 611 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
606 __GFP_HIGHMEM); 612 __GFP_HIGHMEM);
613 if (!cb->compressed_pages[page_index])
614 goto fail2;
607 } 615 }
608 cb->nr_pages = nr_pages; 616 cb->nr_pages = nr_pages;
609 617
@@ -614,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
614 cb->len = uncompressed_len; 622 cb->len = uncompressed_len;
615 623
616 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); 624 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
625 if (!comp_bio)
626 goto fail2;
617 comp_bio->bi_private = cb; 627 comp_bio->bi_private = cb;
618 comp_bio->bi_end_io = end_compressed_bio_read; 628 comp_bio->bi_end_io = end_compressed_bio_read;
619 atomic_inc(&cb->pending_bios); 629 atomic_inc(&cb->pending_bios);
@@ -681,6 +691,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
681 691
682 bio_put(comp_bio); 692 bio_put(comp_bio);
683 return 0; 693 return 0;
694
695fail2:
696 for (page_index = 0; page_index < nr_pages; page_index++)
697 free_page((unsigned long)cb->compressed_pages[page_index]);
698
699 kfree(cb->compressed_pages);
700fail1:
701 kfree(cb);
702out:
703 free_extent_map(em);
704 return ret;
684} 705}
685 706
686static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; 707static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
@@ -900,7 +921,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
900 return ret; 921 return ret;
901} 922}
902 923
903void __exit btrfs_exit_compress(void) 924void btrfs_exit_compress(void)
904{ 925{
905 free_workspaces(); 926 free_workspaces();
906} 927}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b531c36455d8..e1aa8d607bc7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
359 359
360 tree = &BTRFS_I(page->mapping->host)->io_tree; 360 tree = &BTRFS_I(page->mapping->host)->io_tree;
361 361
362 if (page->private == EXTENT_PAGE_PRIVATE) 362 if (page->private == EXTENT_PAGE_PRIVATE) {
363 WARN_ON(1);
363 goto out; 364 goto out;
364 if (!page->private) 365 }
366 if (!page->private) {
367 WARN_ON(1);
365 goto out; 368 goto out;
369 }
366 len = page->private >> 2; 370 len = page->private >> 2;
367 WARN_ON(len == 0); 371 WARN_ON(len == 0);
368 372
@@ -1550,6 +1554,7 @@ static int transaction_kthread(void *arg)
1550 spin_unlock(&root->fs_info->new_trans_lock); 1554 spin_unlock(&root->fs_info->new_trans_lock);
1551 1555
1552 trans = btrfs_join_transaction(root, 1); 1556 trans = btrfs_join_transaction(root, 1);
1557 BUG_ON(IS_ERR(trans));
1553 if (transid == trans->transid) { 1558 if (transid == trans->transid) {
1554 ret = btrfs_commit_transaction(trans, root); 1559 ret = btrfs_commit_transaction(trans, root);
1555 BUG_ON(ret); 1560 BUG_ON(ret);
@@ -2453,10 +2458,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2453 up_write(&root->fs_info->cleanup_work_sem); 2458 up_write(&root->fs_info->cleanup_work_sem);
2454 2459
2455 trans = btrfs_join_transaction(root, 1); 2460 trans = btrfs_join_transaction(root, 1);
2461 if (IS_ERR(trans))
2462 return PTR_ERR(trans);
2456 ret = btrfs_commit_transaction(trans, root); 2463 ret = btrfs_commit_transaction(trans, root);
2457 BUG_ON(ret); 2464 BUG_ON(ret);
2458 /* run commit again to drop the original snapshot */ 2465 /* run commit again to drop the original snapshot */
2459 trans = btrfs_join_transaction(root, 1); 2466 trans = btrfs_join_transaction(root, 1);
2467 if (IS_ERR(trans))
2468 return PTR_ERR(trans);
2460 btrfs_commit_transaction(trans, root); 2469 btrfs_commit_transaction(trans, root);
2461 ret = btrfs_write_and_wait_transaction(NULL, root); 2470 ret = btrfs_write_and_wait_transaction(NULL, root);
2462 BUG_ON(ret); 2471 BUG_ON(ret);
@@ -2554,6 +2563,8 @@ int close_ctree(struct btrfs_root *root)
2554 kfree(fs_info->chunk_root); 2563 kfree(fs_info->chunk_root);
2555 kfree(fs_info->dev_root); 2564 kfree(fs_info->dev_root);
2556 kfree(fs_info->csum_root); 2565 kfree(fs_info->csum_root);
2566 kfree(fs_info);
2567
2557 return 0; 2568 return 0;
2558} 2569}
2559 2570
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9786963b07e5..ff27d7a477b2 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -171,6 +171,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
171 int ret; 171 int ret;
172 172
173 path = btrfs_alloc_path(); 173 path = btrfs_alloc_path();
174 if (!path)
175 return ERR_PTR(-ENOMEM);
174 176
175 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 177 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
176 key.objectid = root->root_key.objectid; 178 key.objectid = root->root_key.objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b55269340cec..f3c96fc01439 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,11 +320,6 @@ static int caching_kthread(void *data)
320 if (!path) 320 if (!path)
321 return -ENOMEM; 321 return -ENOMEM;
322 322
323 exclude_super_stripes(extent_root, block_group);
324 spin_lock(&block_group->space_info->lock);
325 block_group->space_info->bytes_readonly += block_group->bytes_super;
326 spin_unlock(&block_group->space_info->lock);
327
328 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 323 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
329 324
330 /* 325 /*
@@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
467 cache->cached = BTRFS_CACHE_NO; 462 cache->cached = BTRFS_CACHE_NO;
468 } 463 }
469 spin_unlock(&cache->lock); 464 spin_unlock(&cache->lock);
470 if (ret == 1) 465 if (ret == 1) {
466 free_excluded_extents(fs_info->extent_root, cache);
471 return 0; 467 return 0;
468 }
472 } 469 }
473 470
474 if (load_cache_only) 471 if (load_cache_only)
@@ -3344,8 +3341,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3344 u64 reserved; 3341 u64 reserved;
3345 u64 max_reclaim; 3342 u64 max_reclaim;
3346 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left;
3347 int pause = 1; 3345 int pause = 1;
3348 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0;
3349 3348
3350 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3351 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
@@ -3358,7 +3357,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3358 3357
3359 max_reclaim = min(reserved, to_reclaim); 3358 max_reclaim = min(reserved, to_reclaim);
3360 3359
3361 while (1) { 3360 while (loops < 1024) {
3362 /* have the flusher threads jump in and do some IO */ 3361 /* have the flusher threads jump in and do some IO */
3363 smp_mb(); 3362 smp_mb();
3364 nr_pages = min_t(unsigned long, nr_pages, 3363 nr_pages = min_t(unsigned long, nr_pages,
@@ -3366,8 +3365,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3367 3366
3368 spin_lock(&space_info->lock); 3367 spin_lock(&space_info->lock);
3369 if (reserved > space_info->bytes_reserved) 3368 if (reserved > space_info->bytes_reserved) {
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3371 reserved = space_info->bytes_reserved; 3374 reserved = space_info->bytes_reserved;
3372 spin_unlock(&space_info->lock); 3375 spin_unlock(&space_info->lock);
3373 3376
@@ -3378,7 +3381,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3378 return -EAGAIN; 3381 return -EAGAIN;
3379 3382
3380 __set_current_state(TASK_INTERRUPTIBLE); 3383 __set_current_state(TASK_INTERRUPTIBLE);
3381 schedule_timeout(pause); 3384 time_left = schedule_timeout(pause);
3385
3386 /* We were interrupted, exit */
3387 if (time_left)
3388 break;
3389
3382 pause <<= 1; 3390 pause <<= 1;
3383 if (pause > HZ / 10) 3391 if (pause > HZ / 10)
3384 pause = HZ / 10; 3392 pause = HZ / 10;
@@ -3588,8 +3596,20 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3588 3596
3589 if (num_bytes > 0) { 3597 if (num_bytes > 0) {
3590 if (dest) { 3598 if (dest) {
3591 block_rsv_add_bytes(dest, num_bytes, 0); 3599 spin_lock(&dest->lock);
3592 } else { 3600 if (!dest->full) {
3601 u64 bytes_to_add;
3602
3603 bytes_to_add = dest->size - dest->reserved;
3604 bytes_to_add = min(num_bytes, bytes_to_add);
3605 dest->reserved += bytes_to_add;
3606 if (dest->reserved >= dest->size)
3607 dest->full = 1;
3608 num_bytes -= bytes_to_add;
3609 }
3610 spin_unlock(&dest->lock);
3611 }
3612 if (num_bytes) {
3593 spin_lock(&space_info->lock); 3613 spin_lock(&space_info->lock);
3594 space_info->bytes_reserved -= num_bytes; 3614 space_info->bytes_reserved -= num_bytes;
3595 spin_unlock(&space_info->lock); 3615 spin_unlock(&space_info->lock);
@@ -4012,6 +4032,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4012 4032
4013 num_bytes = ALIGN(num_bytes, root->sectorsize); 4033 num_bytes = ALIGN(num_bytes, root->sectorsize);
4014 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4034 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
4035 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4015 4036
4016 spin_lock(&BTRFS_I(inode)->accounting_lock); 4037 spin_lock(&BTRFS_I(inode)->accounting_lock);
4017 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4038 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
@@ -5633,6 +5654,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5633 struct btrfs_root *root, u32 blocksize) 5654 struct btrfs_root *root, u32 blocksize)
5634{ 5655{
5635 struct btrfs_block_rsv *block_rsv; 5656 struct btrfs_block_rsv *block_rsv;
5657 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5636 int ret; 5658 int ret;
5637 5659
5638 block_rsv = get_block_rsv(trans, root); 5660 block_rsv = get_block_rsv(trans, root);
@@ -5640,14 +5662,39 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5640 if (block_rsv->size == 0) { 5662 if (block_rsv->size == 0) {
5641 ret = reserve_metadata_bytes(trans, root, block_rsv, 5663 ret = reserve_metadata_bytes(trans, root, block_rsv,
5642 blocksize, 0); 5664 blocksize, 0);
5643 if (ret) 5665 /*
5666 * If we couldn't reserve metadata bytes try and use some from
5667 * the global reserve.
5668 */
5669 if (ret && block_rsv != global_rsv) {
5670 ret = block_rsv_use_bytes(global_rsv, blocksize);
5671 if (!ret)
5672 return global_rsv;
5673 return ERR_PTR(ret);
5674 } else if (ret) {
5644 return ERR_PTR(ret); 5675 return ERR_PTR(ret);
5676 }
5645 return block_rsv; 5677 return block_rsv;
5646 } 5678 }
5647 5679
5648 ret = block_rsv_use_bytes(block_rsv, blocksize); 5680 ret = block_rsv_use_bytes(block_rsv, blocksize);
5649 if (!ret) 5681 if (!ret)
5650 return block_rsv; 5682 return block_rsv;
5683 if (ret) {
5684 WARN_ON(1);
5685 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
5686 0);
5687 if (!ret) {
5688 spin_lock(&block_rsv->lock);
5689 block_rsv->size += blocksize;
5690 spin_unlock(&block_rsv->lock);
5691 return block_rsv;
5692 } else if (ret && block_rsv != global_rsv) {
5693 ret = block_rsv_use_bytes(global_rsv, blocksize);
5694 if (!ret)
5695 return global_rsv;
5696 }
5697 }
5651 5698
5652 return ERR_PTR(-ENOSPC); 5699 return ERR_PTR(-ENOSPC);
5653} 5700}
@@ -6221,6 +6268,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6221 BUG_ON(!wc); 6268 BUG_ON(!wc);
6222 6269
6223 trans = btrfs_start_transaction(tree_root, 0); 6270 trans = btrfs_start_transaction(tree_root, 0);
6271 BUG_ON(IS_ERR(trans));
6272
6224 if (block_rsv) 6273 if (block_rsv)
6225 trans->block_rsv = block_rsv; 6274 trans->block_rsv = block_rsv;
6226 6275
@@ -6318,6 +6367,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6318 6367
6319 btrfs_end_transaction_throttle(trans, tree_root); 6368 btrfs_end_transaction_throttle(trans, tree_root);
6320 trans = btrfs_start_transaction(tree_root, 0); 6369 trans = btrfs_start_transaction(tree_root, 0);
6370 BUG_ON(IS_ERR(trans));
6321 if (block_rsv) 6371 if (block_rsv)
6322 trans->block_rsv = block_rsv; 6372 trans->block_rsv = block_rsv;
6323 } 6373 }
@@ -6446,6 +6496,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6446 int ret = 0; 6496 int ret = 0;
6447 6497
6448 ra = kzalloc(sizeof(*ra), GFP_NOFS); 6498 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6499 if (!ra)
6500 return -ENOMEM;
6449 6501
6450 mutex_lock(&inode->i_mutex); 6502 mutex_lock(&inode->i_mutex);
6451 first_index = start >> PAGE_CACHE_SHIFT; 6503 first_index = start >> PAGE_CACHE_SHIFT;
@@ -6531,7 +6583,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
6531 u64 end = start + extent_key->offset - 1; 6583 u64 end = start + extent_key->offset - 1;
6532 6584
6533 em = alloc_extent_map(GFP_NOFS); 6585 em = alloc_extent_map(GFP_NOFS);
6534 BUG_ON(!em || IS_ERR(em)); 6586 BUG_ON(!em);
6535 6587
6536 em->start = start; 6588 em->start = start;
6537 em->len = extent_key->offset; 6589 em->len = extent_key->offset;
@@ -7477,7 +7529,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7477 BUG_ON(reloc_root->commit_root != NULL); 7529 BUG_ON(reloc_root->commit_root != NULL);
7478 while (1) { 7530 while (1) {
7479 trans = btrfs_join_transaction(root, 1); 7531 trans = btrfs_join_transaction(root, 1);
7480 BUG_ON(!trans); 7532 BUG_ON(IS_ERR(trans));
7481 7533
7482 mutex_lock(&root->fs_info->drop_mutex); 7534 mutex_lock(&root->fs_info->drop_mutex);
7483 ret = btrfs_drop_snapshot(trans, reloc_root); 7535 ret = btrfs_drop_snapshot(trans, reloc_root);
@@ -7535,7 +7587,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7535 7587
7536 if (found) { 7588 if (found) {
7537 trans = btrfs_start_transaction(root, 1); 7589 trans = btrfs_start_transaction(root, 1);
7538 BUG_ON(!trans); 7590 BUG_ON(IS_ERR(trans));
7539 ret = btrfs_commit_transaction(trans, root); 7591 ret = btrfs_commit_transaction(trans, root);
7540 BUG_ON(ret); 7592 BUG_ON(ret);
7541 } 7593 }
@@ -7779,7 +7831,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7779 7831
7780 7832
7781 trans = btrfs_start_transaction(extent_root, 1); 7833 trans = btrfs_start_transaction(extent_root, 1);
7782 BUG_ON(!trans); 7834 BUG_ON(IS_ERR(trans));
7783 7835
7784 if (extent_key->objectid == 0) { 7836 if (extent_key->objectid == 0) {
7785 ret = del_extent_zero(trans, extent_root, path, extent_key); 7837 ret = del_extent_zero(trans, extent_root, path, extent_key);
@@ -8270,6 +8322,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8270 if (block_group->cached == BTRFS_CACHE_STARTED) 8322 if (block_group->cached == BTRFS_CACHE_STARTED)
8271 wait_block_group_cache_done(block_group); 8323 wait_block_group_cache_done(block_group);
8272 8324
8325 /*
8326 * We haven't cached this block group, which means we could
8327 * possibly have excluded extents on this block group.
8328 */
8329 if (block_group->cached == BTRFS_CACHE_NO)
8330 free_excluded_extents(info->extent_root, block_group);
8331
8273 btrfs_remove_free_space_cache(block_group); 8332 btrfs_remove_free_space_cache(block_group);
8274 btrfs_put_block_group(block_group); 8333 btrfs_put_block_group(block_group);
8275 8334
@@ -8385,6 +8444,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8385 cache->sectorsize = root->sectorsize; 8444 cache->sectorsize = root->sectorsize;
8386 8445
8387 /* 8446 /*
8447 * We need to exclude the super stripes now so that the space
8448 * info has super bytes accounted for, otherwise we'll think
8449 * we have more space than we actually do.
8450 */
8451 exclude_super_stripes(root, cache);
8452
8453 /*
8388 * check for two cases, either we are full, and therefore 8454 * check for two cases, either we are full, and therefore
8389 * don't need to bother with the caching work since we won't 8455 * don't need to bother with the caching work since we won't
8390 * find any space, or we are empty, and we can just add all 8456 * find any space, or we are empty, and we can just add all
@@ -8392,12 +8458,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8392 * time, particularly in the full case. 8458 * time, particularly in the full case.
8393 */ 8459 */
8394 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 8460 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
8395 exclude_super_stripes(root, cache);
8396 cache->last_byte_to_unpin = (u64)-1; 8461 cache->last_byte_to_unpin = (u64)-1;
8397 cache->cached = BTRFS_CACHE_FINISHED; 8462 cache->cached = BTRFS_CACHE_FINISHED;
8398 free_excluded_extents(root, cache); 8463 free_excluded_extents(root, cache);
8399 } else if (btrfs_block_group_used(&cache->item) == 0) { 8464 } else if (btrfs_block_group_used(&cache->item) == 0) {
8400 exclude_super_stripes(root, cache);
8401 cache->last_byte_to_unpin = (u64)-1; 8465 cache->last_byte_to_unpin = (u64)-1;
8402 cache->cached = BTRFS_CACHE_FINISHED; 8466 cache->cached = BTRFS_CACHE_FINISHED;
8403 add_new_free_space(cache, root->fs_info, 8467 add_new_free_space(cache, root->fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e993cf1766e..92ac5192c518 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1865,7 +1865,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1865 bio_get(bio); 1865 bio_get(bio);
1866 1866
1867 if (tree->ops && tree->ops->submit_bio_hook) 1867 if (tree->ops && tree->ops->submit_bio_hook)
1868 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1868 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1869 mirror_num, bio_flags, start); 1869 mirror_num, bio_flags, start);
1870 else 1870 else
1871 submit_bio(rw, bio); 1871 submit_bio(rw, bio);
@@ -1920,6 +1920,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1920 nr = bio_get_nr_vecs(bdev); 1920 nr = bio_get_nr_vecs(bdev);
1921 1921
1922 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1922 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1923 if (!bio)
1924 return -ENOMEM;
1923 1925
1924 bio_add_page(bio, page, page_size, offset); 1926 bio_add_page(bio, page, page_size, offset);
1925 bio->bi_end_io = end_io_func; 1927 bio->bi_end_io = end_io_func;
@@ -1944,6 +1946,7 @@ void set_page_extent_mapped(struct page *page)
1944 1946
1945static void set_page_extent_head(struct page *page, unsigned long len) 1947static void set_page_extent_head(struct page *page, unsigned long len)
1946{ 1948{
1949 WARN_ON(!PagePrivate(page));
1947 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1950 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1948} 1951}
1949 1952
@@ -2126,7 +2129,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2126 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2129 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2127 &bio_flags); 2130 &bio_flags);
2128 if (bio) 2131 if (bio)
2129 submit_one_bio(READ, bio, 0, bio_flags); 2132 ret = submit_one_bio(READ, bio, 0, bio_flags);
2130 return ret; 2133 return ret;
2131} 2134}
2132 2135
@@ -2819,9 +2822,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2819 * at this point we can safely clear everything except the 2822 * at this point we can safely clear everything except the
2820 * locked bit and the nodatasum bit 2823 * locked bit and the nodatasum bit
2821 */ 2824 */
2822 clear_extent_bit(tree, start, end, 2825 ret = clear_extent_bit(tree, start, end,
2823 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2826 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2824 0, 0, NULL, mask); 2827 0, 0, NULL, mask);
2828
2829 /* if clear_extent_bit failed for enomem reasons,
2830 * we can't allow the release to continue.
2831 */
2832 if (ret < 0)
2833 ret = 0;
2834 else
2835 ret = 1;
2825 } 2836 }
2826 return ret; 2837 return ret;
2827} 2838}
@@ -3192,7 +3203,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3192 } 3203 }
3193 if (!PageUptodate(p)) 3204 if (!PageUptodate(p))
3194 uptodate = 0; 3205 uptodate = 0;
3195 unlock_page(p); 3206
3207 /*
3208 * see below about how we avoid a nasty race with release page
3209 * and why we unlock later
3210 */
3211 if (i != 0)
3212 unlock_page(p);
3196 } 3213 }
3197 if (uptodate) 3214 if (uptodate)
3198 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3215 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3216,9 +3233,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3216 atomic_inc(&eb->refs); 3233 atomic_inc(&eb->refs);
3217 spin_unlock(&tree->buffer_lock); 3234 spin_unlock(&tree->buffer_lock);
3218 radix_tree_preload_end(); 3235 radix_tree_preload_end();
3236
3237 /*
3238 * there is a race where release page may have
3239 * tried to find this extent buffer in the radix
3240 * but failed. It will tell the VM it is safe to
3241 * reclaim the, and it will clear the page private bit.
3242 * We must make sure to set the page private bit properly
3243 * after the extent buffer is in the radix tree so
3244 * it doesn't get lost
3245 */
3246 set_page_extent_mapped(eb->first_page);
3247 set_page_extent_head(eb->first_page, eb->len);
3248 if (!page0)
3249 unlock_page(eb->first_page);
3219 return eb; 3250 return eb;
3220 3251
3221free_eb: 3252free_eb:
3253 if (eb->first_page && !page0)
3254 unlock_page(eb->first_page);
3255
3222 if (!atomic_dec_and_test(&eb->refs)) 3256 if (!atomic_dec_and_test(&eb->refs))
3223 return exists; 3257 return exists;
3224 btrfs_release_extent_buffer(eb); 3258 btrfs_release_extent_buffer(eb);
@@ -3269,10 +3303,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3269 continue; 3303 continue;
3270 3304
3271 lock_page(page); 3305 lock_page(page);
3306 WARN_ON(!PagePrivate(page));
3307
3308 set_page_extent_mapped(page);
3272 if (i == 0) 3309 if (i == 0)
3273 set_page_extent_head(page, eb->len); 3310 set_page_extent_head(page, eb->len);
3274 else
3275 set_page_private(page, EXTENT_PAGE_PRIVATE);
3276 3311
3277 clear_page_dirty_for_io(page); 3312 clear_page_dirty_for_io(page);
3278 spin_lock_irq(&page->mapping->tree_lock); 3313 spin_lock_irq(&page->mapping->tree_lock);
@@ -3462,6 +3497,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3462 3497
3463 for (i = start_i; i < num_pages; i++) { 3498 for (i = start_i; i < num_pages; i++) {
3464 page = extent_buffer_page(eb, i); 3499 page = extent_buffer_page(eb, i);
3500
3501 WARN_ON(!PagePrivate(page));
3502
3503 set_page_extent_mapped(page);
3504 if (i == 0)
3505 set_page_extent_head(page, eb->len);
3506
3465 if (inc_all_pages) 3507 if (inc_all_pages)
3466 page_cache_get(page); 3508 page_cache_get(page);
3467 if (!PageUptodate(page)) { 3509 if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce12530..2b6c12e983b3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
51{ 51{
52 struct extent_map *em; 52 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 53 em = kmem_cache_alloc(extent_map_cache, mask);
54 if (!em || IS_ERR(em)) 54 if (!em)
55 return em; 55 return NULL;
56 em->in_tree = 0; 56 em->in_tree = 0;
57 em->flags = 0; 57 em->flags = 0;
58 em->compress_type = BTRFS_COMPRESS_NONE; 58 em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a562a250ae77..4f19a3e1bf32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
536 root = root->fs_info->csum_root; 536 root = root->fs_info->csum_root;
537 537
538 path = btrfs_alloc_path(); 538 path = btrfs_alloc_path();
539 if (!path)
540 return -ENOMEM;
539 541
540 while (1) { 542 while (1) {
541 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 543 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -548,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
548 if (path->slots[0] == 0) 550 if (path->slots[0] == 0)
549 goto out; 551 goto out;
550 path->slots[0]--; 552 path->slots[0]--;
553 } else if (ret < 0) {
554 goto out;
551 } 555 }
556
552 leaf = path->nodes[0]; 557 leaf = path->nodes[0];
553 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 558 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
554 559
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c800d58f3013..7084140d5940 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -186,6 +186,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
186 split = alloc_extent_map(GFP_NOFS); 186 split = alloc_extent_map(GFP_NOFS);
187 if (!split2) 187 if (!split2)
188 split2 = alloc_extent_map(GFP_NOFS); 188 split2 = alloc_extent_map(GFP_NOFS);
189 BUG_ON(!split || !split2);
189 190
190 write_lock(&em_tree->lock); 191 write_lock(&em_tree->lock);
191 em = lookup_extent_mapping(em_tree, start, len); 192 em = lookup_extent_mapping(em_tree, start, len);
@@ -793,8 +794,12 @@ again:
793 for (i = 0; i < num_pages; i++) { 794 for (i = 0; i < num_pages; i++) {
794 pages[i] = grab_cache_page(inode->i_mapping, index + i); 795 pages[i] = grab_cache_page(inode->i_mapping, index + i);
795 if (!pages[i]) { 796 if (!pages[i]) {
796 err = -ENOMEM; 797 int c;
797 BUG_ON(1); 798 for (c = i - 1; c >= 0; c--) {
799 unlock_page(pages[c]);
800 page_cache_release(pages[c]);
801 }
802 return -ENOMEM;
798 } 803 }
799 wait_on_page_writeback(pages[i]); 804 wait_on_page_writeback(pages[i]);
800 } 805 }
@@ -946,6 +951,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
946 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 951 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
947 (sizeof(struct page *))); 952 (sizeof(struct page *)));
948 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 953 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
954 if (!pages) {
955 ret = -ENOMEM;
956 goto out;
957 }
949 958
950 /* generic_write_checks can change our pos */ 959 /* generic_write_checks can change our pos */
951 start_pos = pos; 960 start_pos = pos;
@@ -984,8 +993,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
984 size_t write_bytes = min(iov_iter_count(&i), 993 size_t write_bytes = min(iov_iter_count(&i),
985 nrptrs * (size_t)PAGE_CACHE_SIZE - 994 nrptrs * (size_t)PAGE_CACHE_SIZE -
986 offset); 995 offset);
987 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> 996 size_t num_pages = (write_bytes + offset +
988 PAGE_CACHE_SHIFT; 997 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
989 998
990 WARN_ON(num_pages > nrptrs); 999 WARN_ON(num_pages > nrptrs);
991 memset(pages, 0, sizeof(struct page *) * nrptrs); 1000 memset(pages, 0, sizeof(struct page *) * nrptrs);
@@ -1015,8 +1024,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1015 1024
1016 copied = btrfs_copy_from_user(pos, num_pages, 1025 copied = btrfs_copy_from_user(pos, num_pages,
1017 write_bytes, pages, &i); 1026 write_bytes, pages, &i);
1018 dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> 1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
1019 PAGE_CACHE_SHIFT; 1028 PAGE_CACHE_SHIFT;
1020 1029
1021 if (num_pages > dirty_pages) { 1030 if (num_pages > dirty_pages) {
1022 if (copied > 0) 1031 if (copied > 0)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 60d684266959..a0390657451b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
987 return entry; 987 return entry;
988} 988}
989 989
990static void unlink_free_space(struct btrfs_block_group_cache *block_group, 990static inline void
991 struct btrfs_free_space *info) 991__unlink_free_space(struct btrfs_block_group_cache *block_group,
992 struct btrfs_free_space *info)
992{ 993{
993 rb_erase(&info->offset_index, &block_group->free_space_offset); 994 rb_erase(&info->offset_index, &block_group->free_space_offset);
994 block_group->free_extents--; 995 block_group->free_extents--;
996}
997
998static void unlink_free_space(struct btrfs_block_group_cache *block_group,
999 struct btrfs_free_space *info)
1000{
1001 __unlink_free_space(block_group, info);
995 block_group->free_space -= info->bytes; 1002 block_group->free_space -= info->bytes;
996} 1003}
997 1004
@@ -1016,14 +1023,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1016 u64 max_bytes; 1023 u64 max_bytes;
1017 u64 bitmap_bytes; 1024 u64 bitmap_bytes;
1018 u64 extent_bytes; 1025 u64 extent_bytes;
1026 u64 size = block_group->key.offset;
1019 1027
1020 /* 1028 /*
1021 * The goal is to keep the total amount of memory used per 1gb of space 1029 * The goal is to keep the total amount of memory used per 1gb of space
1022 * at or below 32k, so we need to adjust how much memory we allow to be 1030 * at or below 32k, so we need to adjust how much memory we allow to be
1023 * used by extent based free space tracking 1031 * used by extent based free space tracking
1024 */ 1032 */
1025 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1033 if (size < 1024 * 1024 * 1024)
1026 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 1034 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1035 else
1036 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1037 div64_u64(size, 1024 * 1024 * 1024);
1027 1038
1028 /* 1039 /*
1029 * we want to account for 1 more bitmap than what we have so we can make 1040 * we want to account for 1 more bitmap than what we have so we can make
@@ -1171,6 +1182,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
1171 recalculate_thresholds(block_group); 1182 recalculate_thresholds(block_group);
1172} 1183}
1173 1184
1185static void free_bitmap(struct btrfs_block_group_cache *block_group,
1186 struct btrfs_free_space *bitmap_info)
1187{
1188 unlink_free_space(block_group, bitmap_info);
1189 kfree(bitmap_info->bitmap);
1190 kfree(bitmap_info);
1191 block_group->total_bitmaps--;
1192 recalculate_thresholds(block_group);
1193}
1194
1174static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 1195static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
1175 struct btrfs_free_space *bitmap_info, 1196 struct btrfs_free_space *bitmap_info,
1176 u64 *offset, u64 *bytes) 1197 u64 *offset, u64 *bytes)
@@ -1195,6 +1216,7 @@ again:
1195 */ 1216 */
1196 search_start = *offset; 1217 search_start = *offset;
1197 search_bytes = *bytes; 1218 search_bytes = *bytes;
1219 search_bytes = min(search_bytes, end - search_start + 1);
1198 ret = search_bitmap(block_group, bitmap_info, &search_start, 1220 ret = search_bitmap(block_group, bitmap_info, &search_start,
1199 &search_bytes); 1221 &search_bytes);
1200 BUG_ON(ret < 0 || search_start != *offset); 1222 BUG_ON(ret < 0 || search_start != *offset);
@@ -1211,13 +1233,8 @@ again:
1211 1233
1212 if (*bytes) { 1234 if (*bytes) {
1213 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1235 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1214 if (!bitmap_info->bytes) { 1236 if (!bitmap_info->bytes)
1215 unlink_free_space(block_group, bitmap_info); 1237 free_bitmap(block_group, bitmap_info);
1216 kfree(bitmap_info->bitmap);
1217 kfree(bitmap_info);
1218 block_group->total_bitmaps--;
1219 recalculate_thresholds(block_group);
1220 }
1221 1238
1222 /* 1239 /*
1223 * no entry after this bitmap, but we still have bytes to 1240 * no entry after this bitmap, but we still have bytes to
@@ -1250,13 +1267,8 @@ again:
1250 return -EAGAIN; 1267 return -EAGAIN;
1251 1268
1252 goto again; 1269 goto again;
1253 } else if (!bitmap_info->bytes) { 1270 } else if (!bitmap_info->bytes)
1254 unlink_free_space(block_group, bitmap_info); 1271 free_bitmap(block_group, bitmap_info);
1255 kfree(bitmap_info->bitmap);
1256 kfree(bitmap_info);
1257 block_group->total_bitmaps--;
1258 recalculate_thresholds(block_group);
1259 }
1260 1272
1261 return 0; 1273 return 0;
1262} 1274}
@@ -1359,22 +1371,14 @@ out:
1359 return ret; 1371 return ret;
1360} 1372}
1361 1373
1362int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 1374bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1363 u64 offset, u64 bytes) 1375 struct btrfs_free_space *info, bool update_stat)
1364{ 1376{
1365 struct btrfs_free_space *right_info = NULL; 1377 struct btrfs_free_space *left_info;
1366 struct btrfs_free_space *left_info = NULL; 1378 struct btrfs_free_space *right_info;
1367 struct btrfs_free_space *info = NULL; 1379 bool merged = false;
1368 int ret = 0; 1380 u64 offset = info->offset;
1369 1381 u64 bytes = info->bytes;
1370 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1371 if (!info)
1372 return -ENOMEM;
1373
1374 info->offset = offset;
1375 info->bytes = bytes;
1376
1377 spin_lock(&block_group->tree_lock);
1378 1382
1379 /* 1383 /*
1380 * first we want to see if there is free space adjacent to the range we 1384 * first we want to see if there is free space adjacent to the range we
@@ -1388,37 +1392,62 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1388 else 1392 else
1389 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 1393 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
1390 1394
1391 /*
1392 * If there was no extent directly to the left or right of this new
1393 * extent then we know we're going to have to allocate a new extent, so
1394 * before we do that see if we need to drop this into a bitmap
1395 */
1396 if ((!left_info || left_info->bitmap) &&
1397 (!right_info || right_info->bitmap)) {
1398 ret = insert_into_bitmap(block_group, info);
1399
1400 if (ret < 0) {
1401 goto out;
1402 } else if (ret) {
1403 ret = 0;
1404 goto out;
1405 }
1406 }
1407
1408 if (right_info && !right_info->bitmap) { 1395 if (right_info && !right_info->bitmap) {
1409 unlink_free_space(block_group, right_info); 1396 if (update_stat)
1397 unlink_free_space(block_group, right_info);
1398 else
1399 __unlink_free_space(block_group, right_info);
1410 info->bytes += right_info->bytes; 1400 info->bytes += right_info->bytes;
1411 kfree(right_info); 1401 kfree(right_info);
1402 merged = true;
1412 } 1403 }
1413 1404
1414 if (left_info && !left_info->bitmap && 1405 if (left_info && !left_info->bitmap &&
1415 left_info->offset + left_info->bytes == offset) { 1406 left_info->offset + left_info->bytes == offset) {
1416 unlink_free_space(block_group, left_info); 1407 if (update_stat)
1408 unlink_free_space(block_group, left_info);
1409 else
1410 __unlink_free_space(block_group, left_info);
1417 info->offset = left_info->offset; 1411 info->offset = left_info->offset;
1418 info->bytes += left_info->bytes; 1412 info->bytes += left_info->bytes;
1419 kfree(left_info); 1413 kfree(left_info);
1414 merged = true;
1420 } 1415 }
1421 1416
1417 return merged;
1418}
1419
1420int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1421 u64 offset, u64 bytes)
1422{
1423 struct btrfs_free_space *info;
1424 int ret = 0;
1425
1426 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1427 if (!info)
1428 return -ENOMEM;
1429
1430 info->offset = offset;
1431 info->bytes = bytes;
1432
1433 spin_lock(&block_group->tree_lock);
1434
1435 if (try_merge_free_space(block_group, info, true))
1436 goto link;
1437
1438 /*
1439 * There was no extent directly to the left or right of this new
1440 * extent then we know we're going to have to allocate a new extent, so
1441 * before we do that see if we need to drop this into a bitmap
1442 */
1443 ret = insert_into_bitmap(block_group, info);
1444 if (ret < 0) {
1445 goto out;
1446 } else if (ret) {
1447 ret = 0;
1448 goto out;
1449 }
1450link:
1422 ret = link_free_space(block_group, info); 1451 ret = link_free_space(block_group, info);
1423 if (ret) 1452 if (ret)
1424 kfree(info); 1453 kfree(info);
@@ -1621,6 +1650,7 @@ __btrfs_return_cluster_to_free_space(
1621 node = rb_next(&entry->offset_index); 1650 node = rb_next(&entry->offset_index);
1622 rb_erase(&entry->offset_index, &cluster->root); 1651 rb_erase(&entry->offset_index, &cluster->root);
1623 BUG_ON(entry->bitmap); 1652 BUG_ON(entry->bitmap);
1653 try_merge_free_space(block_group, entry, false);
1624 tree_insert_offset(&block_group->free_space_offset, 1654 tree_insert_offset(&block_group->free_space_offset,
1625 entry->offset, &entry->offset_index, 0); 1655 entry->offset, &entry->offset_index, 0);
1626 } 1656 }
@@ -1685,13 +1715,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1685 ret = offset; 1715 ret = offset;
1686 if (entry->bitmap) { 1716 if (entry->bitmap) {
1687 bitmap_clear_bits(block_group, entry, offset, bytes); 1717 bitmap_clear_bits(block_group, entry, offset, bytes);
1688 if (!entry->bytes) { 1718 if (!entry->bytes)
1689 unlink_free_space(block_group, entry); 1719 free_bitmap(block_group, entry);
1690 kfree(entry->bitmap);
1691 kfree(entry);
1692 block_group->total_bitmaps--;
1693 recalculate_thresholds(block_group);
1694 }
1695 } else { 1720 } else {
1696 unlink_free_space(block_group, entry); 1721 unlink_free_space(block_group, entry);
1697 entry->offset += bytes; 1722 entry->offset += bytes;
@@ -1789,6 +1814,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1789 1814
1790 ret = search_start; 1815 ret = search_start;
1791 bitmap_clear_bits(block_group, entry, ret, bytes); 1816 bitmap_clear_bits(block_group, entry, ret, bytes);
1817 if (entry->bytes == 0)
1818 free_bitmap(block_group, entry);
1792out: 1819out:
1793 spin_unlock(&cluster->lock); 1820 spin_unlock(&cluster->lock);
1794 spin_unlock(&block_group->tree_lock); 1821 spin_unlock(&block_group->tree_lock);
@@ -1842,15 +1869,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1842 entry->offset += bytes; 1869 entry->offset += bytes;
1843 entry->bytes -= bytes; 1870 entry->bytes -= bytes;
1844 1871
1845 if (entry->bytes == 0) { 1872 if (entry->bytes == 0)
1846 rb_erase(&entry->offset_index, &cluster->root); 1873 rb_erase(&entry->offset_index, &cluster->root);
1847 kfree(entry);
1848 }
1849 break; 1874 break;
1850 } 1875 }
1851out: 1876out:
1852 spin_unlock(&cluster->lock); 1877 spin_unlock(&cluster->lock);
1853 1878
1879 if (!ret)
1880 return 0;
1881
1882 spin_lock(&block_group->tree_lock);
1883
1884 block_group->free_space -= bytes;
1885 if (entry->bytes == 0) {
1886 block_group->free_extents--;
1887 kfree(entry);
1888 }
1889
1890 spin_unlock(&block_group->tree_lock);
1891
1854 return ret; 1892 return ret;
1855} 1893}
1856 1894
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 160b55b3e132..fb9bd7832b6d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -416,7 +416,7 @@ again:
416 } 416 }
417 if (start == 0) { 417 if (start == 0) {
418 trans = btrfs_join_transaction(root, 1); 418 trans = btrfs_join_transaction(root, 1);
419 BUG_ON(!trans); 419 BUG_ON(IS_ERR(trans));
420 btrfs_set_trans_block_group(trans, inode); 420 btrfs_set_trans_block_group(trans, inode);
421 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 421 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
422 422
@@ -612,6 +612,7 @@ retry:
612 GFP_NOFS); 612 GFP_NOFS);
613 613
614 trans = btrfs_join_transaction(root, 1); 614 trans = btrfs_join_transaction(root, 1);
615 BUG_ON(IS_ERR(trans));
615 ret = btrfs_reserve_extent(trans, root, 616 ret = btrfs_reserve_extent(trans, root,
616 async_extent->compressed_size, 617 async_extent->compressed_size,
617 async_extent->compressed_size, 618 async_extent->compressed_size,
@@ -643,6 +644,7 @@ retry:
643 async_extent->ram_size - 1, 0); 644 async_extent->ram_size - 1, 0);
644 645
645 em = alloc_extent_map(GFP_NOFS); 646 em = alloc_extent_map(GFP_NOFS);
647 BUG_ON(!em);
646 em->start = async_extent->start; 648 em->start = async_extent->start;
647 em->len = async_extent->ram_size; 649 em->len = async_extent->ram_size;
648 em->orig_start = em->start; 650 em->orig_start = em->start;
@@ -771,7 +773,7 @@ static noinline int cow_file_range(struct inode *inode,
771 773
772 BUG_ON(root == root->fs_info->tree_root); 774 BUG_ON(root == root->fs_info->tree_root);
773 trans = btrfs_join_transaction(root, 1); 775 trans = btrfs_join_transaction(root, 1);
774 BUG_ON(!trans); 776 BUG_ON(IS_ERR(trans));
775 btrfs_set_trans_block_group(trans, inode); 777 btrfs_set_trans_block_group(trans, inode);
776 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 778 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
777 779
@@ -819,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
819 BUG_ON(ret); 821 BUG_ON(ret);
820 822
821 em = alloc_extent_map(GFP_NOFS); 823 em = alloc_extent_map(GFP_NOFS);
824 BUG_ON(!em);
822 em->start = start; 825 em->start = start;
823 em->orig_start = em->start; 826 em->orig_start = em->start;
824 ram_size = ins.offset; 827 ram_size = ins.offset;
@@ -1049,7 +1052,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1049 } else { 1052 } else {
1050 trans = btrfs_join_transaction(root, 1); 1053 trans = btrfs_join_transaction(root, 1);
1051 } 1054 }
1052 BUG_ON(!trans); 1055 BUG_ON(IS_ERR(trans));
1053 1056
1054 cow_start = (u64)-1; 1057 cow_start = (u64)-1;
1055 cur_offset = start; 1058 cur_offset = start;
@@ -1168,6 +1171,7 @@ out_check:
1168 struct extent_map_tree *em_tree; 1171 struct extent_map_tree *em_tree;
1169 em_tree = &BTRFS_I(inode)->extent_tree; 1172 em_tree = &BTRFS_I(inode)->extent_tree;
1170 em = alloc_extent_map(GFP_NOFS); 1173 em = alloc_extent_map(GFP_NOFS);
1174 BUG_ON(!em);
1171 em->start = cur_offset; 1175 em->start = cur_offset;
1172 em->orig_start = em->start; 1176 em->orig_start = em->start;
1173 em->len = num_bytes; 1177 em->len = num_bytes;
@@ -1557,6 +1561,7 @@ out:
1557out_page: 1561out_page:
1558 unlock_page(page); 1562 unlock_page(page);
1559 page_cache_release(page); 1563 page_cache_release(page);
1564 kfree(fixup);
1560} 1565}
1561 1566
1562/* 1567/*
@@ -1703,7 +1708,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1703 trans = btrfs_join_transaction_nolock(root, 1); 1708 trans = btrfs_join_transaction_nolock(root, 1);
1704 else 1709 else
1705 trans = btrfs_join_transaction(root, 1); 1710 trans = btrfs_join_transaction(root, 1);
1706 BUG_ON(!trans); 1711 BUG_ON(IS_ERR(trans));
1707 btrfs_set_trans_block_group(trans, inode); 1712 btrfs_set_trans_block_group(trans, inode);
1708 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1713 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1709 ret = btrfs_update_inode(trans, root, inode); 1714 ret = btrfs_update_inode(trans, root, inode);
@@ -1720,6 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1720 trans = btrfs_join_transaction_nolock(root, 1); 1725 trans = btrfs_join_transaction_nolock(root, 1);
1721 else 1726 else
1722 trans = btrfs_join_transaction(root, 1); 1727 trans = btrfs_join_transaction(root, 1);
1728 BUG_ON(IS_ERR(trans));
1723 btrfs_set_trans_block_group(trans, inode); 1729 btrfs_set_trans_block_group(trans, inode);
1724 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1730 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1725 1731
@@ -2354,6 +2360,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2354 */ 2360 */
2355 if (is_bad_inode(inode)) { 2361 if (is_bad_inode(inode)) {
2356 trans = btrfs_start_transaction(root, 0); 2362 trans = btrfs_start_transaction(root, 0);
2363 BUG_ON(IS_ERR(trans));
2357 btrfs_orphan_del(trans, inode); 2364 btrfs_orphan_del(trans, inode);
2358 btrfs_end_transaction(trans, root); 2365 btrfs_end_transaction(trans, root);
2359 iput(inode); 2366 iput(inode);
@@ -2381,6 +2388,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2381 2388
2382 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2389 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2383 trans = btrfs_join_transaction(root, 1); 2390 trans = btrfs_join_transaction(root, 1);
2391 BUG_ON(IS_ERR(trans));
2384 btrfs_end_transaction(trans, root); 2392 btrfs_end_transaction(trans, root);
2385 } 2393 }
2386 2394
@@ -2641,7 +2649,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2641 path = btrfs_alloc_path(); 2649 path = btrfs_alloc_path();
2642 if (!path) { 2650 if (!path) {
2643 ret = -ENOMEM; 2651 ret = -ENOMEM;
2644 goto err; 2652 goto out;
2645 } 2653 }
2646 2654
2647 path->leave_spinning = 1; 2655 path->leave_spinning = 1;
@@ -2714,9 +2722,10 @@ static int check_path_shared(struct btrfs_root *root,
2714 struct extent_buffer *eb; 2722 struct extent_buffer *eb;
2715 int level; 2723 int level;
2716 u64 refs = 1; 2724 u64 refs = 1;
2717 int uninitialized_var(ret);
2718 2725
2719 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2726 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2727 int ret;
2728
2720 if (!path->nodes[level]) 2729 if (!path->nodes[level])
2721 break; 2730 break;
2722 eb = path->nodes[level]; 2731 eb = path->nodes[level];
@@ -2727,7 +2736,7 @@ static int check_path_shared(struct btrfs_root *root,
2727 if (refs > 1) 2736 if (refs > 1)
2728 return 1; 2737 return 1;
2729 } 2738 }
2730 return ret; /* XXX callers? */ 2739 return 0;
2731} 2740}
2732 2741
2733/* 2742/*
@@ -4134,7 +4143,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4134 } 4143 }
4135 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 4144 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
4136 4145
4137 if (root != sub_root) { 4146 if (!IS_ERR(inode) && root != sub_root) {
4138 down_read(&root->fs_info->cleanup_work_sem); 4147 down_read(&root->fs_info->cleanup_work_sem);
4139 if (!(inode->i_sb->s_flags & MS_RDONLY)) 4148 if (!(inode->i_sb->s_flags & MS_RDONLY))
4140 btrfs_orphan_cleanup(sub_root); 4149 btrfs_orphan_cleanup(sub_root);
@@ -4347,6 +4356,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4347 trans = btrfs_join_transaction_nolock(root, 1); 4356 trans = btrfs_join_transaction_nolock(root, 1);
4348 else 4357 else
4349 trans = btrfs_join_transaction(root, 1); 4358 trans = btrfs_join_transaction(root, 1);
4359 if (IS_ERR(trans))
4360 return PTR_ERR(trans);
4350 btrfs_set_trans_block_group(trans, inode); 4361 btrfs_set_trans_block_group(trans, inode);
4351 if (nolock) 4362 if (nolock)
4352 ret = btrfs_end_transaction_nolock(trans, root); 4363 ret = btrfs_end_transaction_nolock(trans, root);
@@ -4372,6 +4383,7 @@ void btrfs_dirty_inode(struct inode *inode)
4372 return; 4383 return;
4373 4384
4374 trans = btrfs_join_transaction(root, 1); 4385 trans = btrfs_join_transaction(root, 1);
4386 BUG_ON(IS_ERR(trans));
4375 btrfs_set_trans_block_group(trans, inode); 4387 btrfs_set_trans_block_group(trans, inode);
4376 4388
4377 ret = btrfs_update_inode(trans, root, inode); 4389 ret = btrfs_update_inode(trans, root, inode);
@@ -5176,6 +5188,8 @@ again:
5176 em = NULL; 5188 em = NULL;
5177 btrfs_release_path(root, path); 5189 btrfs_release_path(root, path);
5178 trans = btrfs_join_transaction(root, 1); 5190 trans = btrfs_join_transaction(root, 1);
5191 if (IS_ERR(trans))
5192 return ERR_CAST(trans);
5179 goto again; 5193 goto again;
5180 } 5194 }
5181 map = kmap(page); 5195 map = kmap(page);
@@ -5280,8 +5294,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5280 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5294 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5281 5295
5282 trans = btrfs_join_transaction(root, 0); 5296 trans = btrfs_join_transaction(root, 0);
5283 if (!trans) 5297 if (IS_ERR(trans))
5284 return ERR_PTR(-ENOMEM); 5298 return ERR_CAST(trans);
5285 5299
5286 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 5300 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5287 5301
@@ -5505,7 +5519,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5505 * while we look for nocow cross refs 5519 * while we look for nocow cross refs
5506 */ 5520 */
5507 trans = btrfs_join_transaction(root, 0); 5521 trans = btrfs_join_transaction(root, 0);
5508 if (!trans) 5522 if (IS_ERR(trans))
5509 goto must_cow; 5523 goto must_cow;
5510 5524
5511 if (can_nocow_odirect(trans, inode, start, len) == 1) { 5525 if (can_nocow_odirect(trans, inode, start, len) == 1) {
@@ -5640,7 +5654,7 @@ again:
5640 BUG_ON(!ordered); 5654 BUG_ON(!ordered);
5641 5655
5642 trans = btrfs_join_transaction(root, 1); 5656 trans = btrfs_join_transaction(root, 1);
5643 if (!trans) { 5657 if (IS_ERR(trans)) {
5644 err = -ENOMEM; 5658 err = -ENOMEM;
5645 goto out; 5659 goto out;
5646 } 5660 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a506a22b522a..be2d4f6aaa5e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -203,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
203 203
204 204
205 trans = btrfs_join_transaction(root, 1); 205 trans = btrfs_join_transaction(root, 1);
206 BUG_ON(!trans); 206 BUG_ON(IS_ERR(trans));
207 207
208 ret = btrfs_update_inode(trans, root, inode); 208 ret = btrfs_update_inode(trans, root, inode);
209 BUG_ON(ret); 209 BUG_ON(ret);
@@ -907,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
907 907
908 if (new_size > old_size) { 908 if (new_size > old_size) {
909 trans = btrfs_start_transaction(root, 0); 909 trans = btrfs_start_transaction(root, 0);
910 if (IS_ERR(trans)) {
911 ret = PTR_ERR(trans);
912 goto out_unlock;
913 }
910 ret = btrfs_grow_device(trans, device, new_size); 914 ret = btrfs_grow_device(trans, device, new_size);
911 btrfs_commit_transaction(trans, root); 915 btrfs_commit_transaction(trans, root);
912 } else { 916 } else {
@@ -1898,7 +1902,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1898 1902
1899 memcpy(&new_key, &key, sizeof(new_key)); 1903 memcpy(&new_key, &key, sizeof(new_key));
1900 new_key.objectid = inode->i_ino; 1904 new_key.objectid = inode->i_ino;
1901 new_key.offset = key.offset + destoff - off; 1905 if (off <= key.offset)
1906 new_key.offset = key.offset + destoff - off;
1907 else
1908 new_key.offset = destoff;
1902 1909
1903 trans = btrfs_start_transaction(root, 1); 1910 trans = btrfs_start_transaction(root, 1);
1904 if (IS_ERR(trans)) { 1911 if (IS_ERR(trans)) {
@@ -2082,7 +2089,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2082 2089
2083 ret = -ENOMEM; 2090 ret = -ENOMEM;
2084 trans = btrfs_start_ioctl_transaction(root, 0); 2091 trans = btrfs_start_ioctl_transaction(root, 0);
2085 if (!trans) 2092 if (IS_ERR(trans))
2086 goto out_drop; 2093 goto out_drop;
2087 2094
2088 file->private_data = trans; 2095 file->private_data = trans;
@@ -2138,9 +2145,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
2138 path->leave_spinning = 1; 2145 path->leave_spinning = 1;
2139 2146
2140 trans = btrfs_start_transaction(root, 1); 2147 trans = btrfs_start_transaction(root, 1);
2141 if (!trans) { 2148 if (IS_ERR(trans)) {
2142 btrfs_free_path(path); 2149 btrfs_free_path(path);
2143 return -ENOMEM; 2150 return PTR_ERR(trans);
2144 } 2151 }
2145 2152
2146 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 2153 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
@@ -2201,7 +2208,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2201 int num_types = 4; 2208 int num_types = 4;
2202 int alloc_size; 2209 int alloc_size;
2203 int ret = 0; 2210 int ret = 0;
2204 int slot_count = 0; 2211 u64 slot_count = 0;
2205 int i, c; 2212 int i, c;
2206 2213
2207 if (copy_from_user(&space_args, 2214 if (copy_from_user(&space_args,
@@ -2240,7 +2247,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2240 goto out; 2247 goto out;
2241 } 2248 }
2242 2249
2243 slot_count = min_t(int, space_args.space_slots, slot_count); 2250 slot_count = min_t(u64, space_args.space_slots, slot_count);
2244 2251
2245 alloc_size = sizeof(*dest) * slot_count; 2252 alloc_size = sizeof(*dest) * slot_count;
2246 2253
@@ -2260,6 +2267,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2260 for (i = 0; i < num_types; i++) { 2267 for (i = 0; i < num_types; i++) {
2261 struct btrfs_space_info *tmp; 2268 struct btrfs_space_info *tmp;
2262 2269
2270 if (!slot_count)
2271 break;
2272
2263 info = NULL; 2273 info = NULL;
2264 rcu_read_lock(); 2274 rcu_read_lock();
2265 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2275 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2281,7 +2291,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2281 memcpy(dest, &space, sizeof(space)); 2291 memcpy(dest, &space, sizeof(space));
2282 dest++; 2292 dest++;
2283 space_args.total_spaces++; 2293 space_args.total_spaces++;
2294 slot_count--;
2284 } 2295 }
2296 if (!slot_count)
2297 break;
2285 } 2298 }
2286 up_read(&info->groups_sem); 2299 up_read(&info->groups_sem);
2287 } 2300 }
@@ -2334,6 +2347,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2334 u64 transid; 2347 u64 transid;
2335 2348
2336 trans = btrfs_start_transaction(root, 0); 2349 trans = btrfs_start_transaction(root, 0);
2350 if (IS_ERR(trans))
2351 return PTR_ERR(trans);
2337 transid = trans->transid; 2352 transid = trans->transid;
2338 btrfs_commit_transaction_async(trans, root, 0); 2353 btrfs_commit_transaction_async(trans, root, 0);
2339 2354
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2b61e1ddcd99..083a55477375 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
141 u64 file_offset) 141 u64 file_offset)
142{ 142{
143 struct rb_root *root = &tree->tree; 143 struct rb_root *root = &tree->tree;
144 struct rb_node *prev; 144 struct rb_node *prev = NULL;
145 struct rb_node *ret; 145 struct rb_node *ret;
146 struct btrfs_ordered_extent *entry; 146 struct btrfs_ordered_extent *entry;
147 147
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0d126be22b63..fb2605d998e9 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
260#else 260#else
261 BUG(); 261 BUG();
262#endif 262#endif
263 break;
263 case BTRFS_BLOCK_GROUP_ITEM_KEY: 264 case BTRFS_BLOCK_GROUP_ITEM_KEY:
264 bi = btrfs_item_ptr(l, i, 265 bi = btrfs_item_ptr(l, i,
265 struct btrfs_block_group_item); 266 struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 045c9c2b2d7e..0825e4ed9447 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1157 new_node->bytenr = dest->node->start; 1157 new_node->bytenr = dest->node->start;
1158 new_node->level = node->level; 1158 new_node->level = node->level;
1159 new_node->lowest = node->lowest; 1159 new_node->lowest = node->lowest;
1160 new_node->checked = 1;
1160 new_node->root = dest; 1161 new_node->root = dest;
1161 1162
1162 if (!node->lowest) { 1163 if (!node->lowest) {
@@ -2028,6 +2029,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2028 2029
2029 while (1) { 2030 while (1) {
2030 trans = btrfs_start_transaction(root, 0); 2031 trans = btrfs_start_transaction(root, 0);
2032 BUG_ON(IS_ERR(trans));
2031 trans->block_rsv = rc->block_rsv; 2033 trans->block_rsv = rc->block_rsv;
2032 2034
2033 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, 2035 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
@@ -2147,6 +2149,12 @@ again:
2147 } 2149 }
2148 2150
2149 trans = btrfs_join_transaction(rc->extent_root, 1); 2151 trans = btrfs_join_transaction(rc->extent_root, 1);
2152 if (IS_ERR(trans)) {
2153 if (!err)
2154 btrfs_block_rsv_release(rc->extent_root,
2155 rc->block_rsv, num_bytes);
2156 return PTR_ERR(trans);
2157 }
2150 2158
2151 if (!err) { 2159 if (!err) {
2152 if (num_bytes != rc->merging_rsv_size) { 2160 if (num_bytes != rc->merging_rsv_size) {
@@ -3222,6 +3230,7 @@ truncate:
3222 trans = btrfs_join_transaction(root, 0); 3230 trans = btrfs_join_transaction(root, 0);
3223 if (IS_ERR(trans)) { 3231 if (IS_ERR(trans)) {
3224 btrfs_free_path(path); 3232 btrfs_free_path(path);
3233 ret = PTR_ERR(trans);
3225 goto out; 3234 goto out;
3226 } 3235 }
3227 3236
@@ -3628,6 +3637,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3628 set_reloc_control(rc); 3637 set_reloc_control(rc);
3629 3638
3630 trans = btrfs_join_transaction(rc->extent_root, 1); 3639 trans = btrfs_join_transaction(rc->extent_root, 1);
3640 BUG_ON(IS_ERR(trans));
3631 btrfs_commit_transaction(trans, rc->extent_root); 3641 btrfs_commit_transaction(trans, rc->extent_root);
3632 return 0; 3642 return 0;
3633} 3643}
@@ -3657,6 +3667,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3657 3667
3658 while (1) { 3668 while (1) {
3659 trans = btrfs_start_transaction(rc->extent_root, 0); 3669 trans = btrfs_start_transaction(rc->extent_root, 0);
3670 BUG_ON(IS_ERR(trans));
3660 3671
3661 if (update_backref_cache(trans, &rc->backref_cache)) { 3672 if (update_backref_cache(trans, &rc->backref_cache)) {
3662 btrfs_end_transaction(trans, rc->extent_root); 3673 btrfs_end_transaction(trans, rc->extent_root);
@@ -3804,7 +3815,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3804 3815
3805 /* get rid of pinned extents */ 3816 /* get rid of pinned extents */
3806 trans = btrfs_join_transaction(rc->extent_root, 1); 3817 trans = btrfs_join_transaction(rc->extent_root, 1);
3807 btrfs_commit_transaction(trans, rc->extent_root); 3818 if (IS_ERR(trans))
3819 err = PTR_ERR(trans);
3820 else
3821 btrfs_commit_transaction(trans, rc->extent_root);
3808out_free: 3822out_free:
3809 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 3823 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
3810 btrfs_free_path(path); 3824 btrfs_free_path(path);
@@ -4022,6 +4036,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4022 int ret; 4036 int ret;
4023 4037
4024 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4038 trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
4039 BUG_ON(IS_ERR(trans));
4025 4040
4026 memset(&root->root_item.drop_progress, 0, 4041 memset(&root->root_item.drop_progress, 0,
4027 sizeof(root->root_item.drop_progress)); 4042 sizeof(root->root_item.drop_progress));
@@ -4125,6 +4140,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4125 set_reloc_control(rc); 4140 set_reloc_control(rc);
4126 4141
4127 trans = btrfs_join_transaction(rc->extent_root, 1); 4142 trans = btrfs_join_transaction(rc->extent_root, 1);
4143 if (IS_ERR(trans)) {
4144 unset_reloc_control(rc);
4145 err = PTR_ERR(trans);
4146 goto out_free;
4147 }
4128 4148
4129 rc->merge_reloc_tree = 1; 4149 rc->merge_reloc_tree = 1;
4130 4150
@@ -4154,9 +4174,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4154 unset_reloc_control(rc); 4174 unset_reloc_control(rc);
4155 4175
4156 trans = btrfs_join_transaction(rc->extent_root, 1); 4176 trans = btrfs_join_transaction(rc->extent_root, 1);
4157 btrfs_commit_transaction(trans, rc->extent_root); 4177 if (IS_ERR(trans))
4158out: 4178 err = PTR_ERR(trans);
4179 else
4180 btrfs_commit_transaction(trans, rc->extent_root);
4181out_free:
4159 kfree(rc); 4182 kfree(rc);
4183out:
4160 while (!list_empty(&reloc_roots)) { 4184 while (!list_empty(&reloc_roots)) {
4161 reloc_root = list_entry(reloc_roots.next, 4185 reloc_root = list_entry(reloc_roots.next,
4162 struct btrfs_root, root_list); 4186 struct btrfs_root, root_list);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b2130c46fdb5..a004008f7d28 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -383,7 +383,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
383 struct btrfs_fs_devices **fs_devices) 383 struct btrfs_fs_devices **fs_devices)
384{ 384{
385 substring_t args[MAX_OPT_ARGS]; 385 substring_t args[MAX_OPT_ARGS];
386 char *opts, *p; 386 char *opts, *orig, *p;
387 int error = 0; 387 int error = 0;
388 int intarg; 388 int intarg;
389 389
@@ -397,6 +397,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
397 opts = kstrdup(options, GFP_KERNEL); 397 opts = kstrdup(options, GFP_KERNEL);
398 if (!opts) 398 if (!opts)
399 return -ENOMEM; 399 return -ENOMEM;
400 orig = opts;
400 401
401 while ((p = strsep(&opts, ",")) != NULL) { 402 while ((p = strsep(&opts, ",")) != NULL) {
402 int token; 403 int token;
@@ -432,7 +433,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
432 } 433 }
433 434
434 out_free_opts: 435 out_free_opts:
435 kfree(opts); 436 kfree(orig);
436 out: 437 out:
437 /* 438 /*
438 * If no subvolume name is specified we use the default one. Allocate 439 * If no subvolume name is specified we use the default one. Allocate
@@ -623,6 +624,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
623 btrfs_wait_ordered_extents(root, 0, 0); 624 btrfs_wait_ordered_extents(root, 0, 0);
624 625
625 trans = btrfs_start_transaction(root, 0); 626 trans = btrfs_start_transaction(root, 0);
627 if (IS_ERR(trans))
628 return PTR_ERR(trans);
626 ret = btrfs_commit_transaction(trans, root); 629 ret = btrfs_commit_transaction(trans, root);
627 return ret; 630 return ret;
628} 631}
@@ -761,6 +764,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
761 } 764 }
762 765
763 btrfs_close_devices(fs_devices); 766 btrfs_close_devices(fs_devices);
767 kfree(fs_info);
768 kfree(tree_root);
764 } else { 769 } else {
765 char b[BDEVNAME_SIZE]; 770 char b[BDEVNAME_SIZE];
766 771
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bae5c7b8bbe2..3d73c8d93bbb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1161,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1161 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1161 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1162 ac->root = root; 1162 ac->root = root;
1163 ac->newtrans = btrfs_join_transaction(root, 0); 1163 ac->newtrans = btrfs_join_transaction(root, 0);
1164 if (IS_ERR(ac->newtrans)) {
1165 int err = PTR_ERR(ac->newtrans);
1166 kfree(ac);
1167 return err;
1168 }
1164 1169
1165 /* take transaction reference */ 1170 /* take transaction reference */
1166 mutex_lock(&root->fs_info->trans_mutex); 1171 mutex_lock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 054744ac5719..a4bbb854dfd2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
338 } 338 }
339 dst_copy = kmalloc(item_size, GFP_NOFS); 339 dst_copy = kmalloc(item_size, GFP_NOFS);
340 src_copy = kmalloc(item_size, GFP_NOFS); 340 src_copy = kmalloc(item_size, GFP_NOFS);
341 if (!dst_copy || !src_copy) {
342 btrfs_release_path(root, path);
343 kfree(dst_copy);
344 kfree(src_copy);
345 return -ENOMEM;
346 }
341 347
342 read_extent_buffer(eb, src_copy, src_ptr, item_size); 348 read_extent_buffer(eb, src_copy, src_ptr, item_size);
343 349
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
665 btrfs_dir_item_key_to_cpu(leaf, di, &location); 671 btrfs_dir_item_key_to_cpu(leaf, di, &location);
666 name_len = btrfs_dir_name_len(leaf, di); 672 name_len = btrfs_dir_name_len(leaf, di);
667 name = kmalloc(name_len, GFP_NOFS); 673 name = kmalloc(name_len, GFP_NOFS);
674 if (!name)
675 return -ENOMEM;
676
668 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
669 btrfs_release_path(root, path); 678 btrfs_release_path(root, path);
670 679
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
744 int match = 0; 753 int match = 0;
745 754
746 path = btrfs_alloc_path(); 755 path = btrfs_alloc_path();
756 if (!path)
757 return -ENOMEM;
758
747 ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 759 ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
748 if (ret != 0) 760 if (ret != 0)
749 goto out; 761 goto out;
@@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
967 key.offset = (u64)-1; 979 key.offset = (u64)-1;
968 980
969 path = btrfs_alloc_path(); 981 path = btrfs_alloc_path();
982 if (!path)
983 return -ENOMEM;
970 984
971 while (1) { 985 while (1) {
972 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 986 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1178 1192
1179 name_len = btrfs_dir_name_len(eb, di); 1193 name_len = btrfs_dir_name_len(eb, di);
1180 name = kmalloc(name_len, GFP_NOFS); 1194 name = kmalloc(name_len, GFP_NOFS);
1195 if (!name)
1196 return -ENOMEM;
1197
1181 log_type = btrfs_dir_type(eb, di); 1198 log_type = btrfs_dir_type(eb, di);
1182 read_extent_buffer(eb, name, (unsigned long)(di + 1), 1199 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1183 name_len); 1200 name_len);
@@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1692 root_owner = btrfs_header_owner(parent); 1709 root_owner = btrfs_header_owner(parent);
1693 1710
1694 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1711 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
1712 if (!next)
1713 return -ENOMEM;
1695 1714
1696 if (*level == 1) { 1715 if (*level == 1) {
1697 wc->process_func(root, next, wc, ptr_gen); 1716 wc->process_func(root, next, wc, ptr_gen);
@@ -2032,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2032 wait_log_commit(trans, log_root_tree, 2051 wait_log_commit(trans, log_root_tree,
2033 log_root_tree->log_transid); 2052 log_root_tree->log_transid);
2034 mutex_unlock(&log_root_tree->log_mutex); 2053 mutex_unlock(&log_root_tree->log_mutex);
2054 ret = 0;
2035 goto out; 2055 goto out;
2036 } 2056 }
2037 atomic_set(&log_root_tree->log_commit[index2], 1); 2057 atomic_set(&log_root_tree->log_commit[index2], 1);
@@ -2096,7 +2116,7 @@ out:
2096 smp_mb(); 2116 smp_mb();
2097 if (waitqueue_active(&root->log_commit_wait[index1])) 2117 if (waitqueue_active(&root->log_commit_wait[index1]))
2098 wake_up(&root->log_commit_wait[index1]); 2118 wake_up(&root->log_commit_wait[index1]);
2099 return 0; 2119 return ret;
2100} 2120}
2101 2121
2102static void free_log_tree(struct btrfs_trans_handle *trans, 2122static void free_log_tree(struct btrfs_trans_handle *trans,
@@ -2194,6 +2214,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2194 2214
2195 log = root->log_root; 2215 log = root->log_root;
2196 path = btrfs_alloc_path(); 2216 path = btrfs_alloc_path();
2217 if (!path)
2218 return -ENOMEM;
2219
2197 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2220 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
2198 name, name_len, -1); 2221 name, name_len, -1);
2199 if (IS_ERR(di)) { 2222 if (IS_ERR(di)) {
@@ -2594,6 +2617,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2594 2617
2595 ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 2618 ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
2596 nr * sizeof(u32), GFP_NOFS); 2619 nr * sizeof(u32), GFP_NOFS);
2620 if (!ins_data)
2621 return -ENOMEM;
2622
2597 ins_sizes = (u32 *)ins_data; 2623 ins_sizes = (u32 *)ins_data;
2598 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 2624 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
2599 2625
@@ -2725,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2725 log = root->log_root; 2751 log = root->log_root;
2726 2752
2727 path = btrfs_alloc_path(); 2753 path = btrfs_alloc_path();
2754 if (!path)
2755 return -ENOMEM;
2728 dst_path = btrfs_alloc_path(); 2756 dst_path = btrfs_alloc_path();
2757 if (!dst_path) {
2758 btrfs_free_path(path);
2759 return -ENOMEM;
2760 }
2729 2761
2730 min_key.objectid = inode->i_ino; 2762 min_key.objectid = inode->i_ino;
2731 min_key.type = BTRFS_INODE_ITEM_KEY; 2763 min_key.type = BTRFS_INODE_ITEM_KEY;
@@ -3080,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3080 BUG_ON(!path); 3112 BUG_ON(!path);
3081 3113
3082 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3114 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3115 BUG_ON(IS_ERR(trans));
3083 3116
3084 wc.trans = trans; 3117 wc.trans = trans;
3085 wc.pin = 1; 3118 wc.pin = 1;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d158530233b7..af7dbca15276 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1213,6 +1213,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
1213 return -ENOMEM; 1213 return -ENOMEM;
1214 1214
1215 trans = btrfs_start_transaction(root, 0); 1215 trans = btrfs_start_transaction(root, 0);
1216 if (IS_ERR(trans)) {
1217 btrfs_free_path(path);
1218 return PTR_ERR(trans);
1219 }
1216 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 1220 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1217 key.type = BTRFS_DEV_ITEM_KEY; 1221 key.type = BTRFS_DEV_ITEM_KEY;
1218 key.offset = device->devid; 1222 key.offset = device->devid;
@@ -1601,11 +1605,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1601 1605
1602 ret = find_next_devid(root, &device->devid); 1606 ret = find_next_devid(root, &device->devid);
1603 if (ret) { 1607 if (ret) {
1608 kfree(device->name);
1604 kfree(device); 1609 kfree(device);
1605 goto error; 1610 goto error;
1606 } 1611 }
1607 1612
1608 trans = btrfs_start_transaction(root, 0); 1613 trans = btrfs_start_transaction(root, 0);
1614 if (IS_ERR(trans)) {
1615 kfree(device->name);
1616 kfree(device);
1617 ret = PTR_ERR(trans);
1618 goto error;
1619 }
1620
1609 lock_chunks(root); 1621 lock_chunks(root);
1610 1622
1611 device->writeable = 1; 1623 device->writeable = 1;
@@ -1873,7 +1885,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1873 return ret; 1885 return ret;
1874 1886
1875 trans = btrfs_start_transaction(root, 0); 1887 trans = btrfs_start_transaction(root, 0);
1876 BUG_ON(!trans); 1888 BUG_ON(IS_ERR(trans));
1877 1889
1878 lock_chunks(root); 1890 lock_chunks(root);
1879 1891
@@ -2047,7 +2059,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
2047 BUG_ON(ret); 2059 BUG_ON(ret);
2048 2060
2049 trans = btrfs_start_transaction(dev_root, 0); 2061 trans = btrfs_start_transaction(dev_root, 0);
2050 BUG_ON(!trans); 2062 BUG_ON(IS_ERR(trans));
2051 2063
2052 ret = btrfs_grow_device(trans, device, old_size); 2064 ret = btrfs_grow_device(trans, device, old_size);
2053 BUG_ON(ret); 2065 BUG_ON(ret);
@@ -2213,6 +2225,11 @@ again:
2213 2225
2214 /* Shrinking succeeded, else we would be at "done". */ 2226 /* Shrinking succeeded, else we would be at "done". */
2215 trans = btrfs_start_transaction(root, 0); 2227 trans = btrfs_start_transaction(root, 0);
2228 if (IS_ERR(trans)) {
2229 ret = PTR_ERR(trans);
2230 goto done;
2231 }
2232
2216 lock_chunks(root); 2233 lock_chunks(root);
2217 2234
2218 device->disk_total_bytes = new_size; 2235 device->disk_total_bytes = new_size;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 60d27bc9eb83..6b61ded701e1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1560,9 +1560,10 @@ retry_locked:
1560 /* NOTE: no side-effects allowed, until we take s_mutex */ 1560 /* NOTE: no side-effects allowed, until we take s_mutex */
1561 1561
1562 revoking = cap->implemented & ~cap->issued; 1562 revoking = cap->implemented & ~cap->issued;
1563 if (revoking) 1563 dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
1564 dout(" mds%d revoking %s\n", cap->mds, 1564 cap->mds, cap, ceph_cap_string(cap->issued),
1565 ceph_cap_string(revoking)); 1565 ceph_cap_string(cap->implemented),
1566 ceph_cap_string(revoking));
1566 1567
1567 if (cap == ci->i_auth_cap && 1568 if (cap == ci->i_auth_cap &&
1568 (cap->issued & CEPH_CAP_FILE_WR)) { 1569 (cap->issued & CEPH_CAP_FILE_WR)) {
@@ -1658,6 +1659,8 @@ ack:
1658 1659
1659 if (cap == ci->i_auth_cap && ci->i_dirty_caps) 1660 if (cap == ci->i_auth_cap && ci->i_dirty_caps)
1660 flushing = __mark_caps_flushing(inode, session); 1661 flushing = __mark_caps_flushing(inode, session);
1662 else
1663 flushing = 0;
1661 1664
1662 mds = cap->mds; /* remember mds, so we don't repeat */ 1665 mds = cap->mds; /* remember mds, so we don't repeat */
1663 sent++; 1666 sent++;
@@ -1940,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1940 } 1943 }
1941} 1944}
1942 1945
1946static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1947 struct ceph_mds_session *session,
1948 struct inode *inode)
1949{
1950 struct ceph_inode_info *ci = ceph_inode(inode);
1951 struct ceph_cap *cap;
1952 int delayed = 0;
1953
1954 spin_lock(&inode->i_lock);
1955 cap = ci->i_auth_cap;
1956 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
1957 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
1958 __ceph_flush_snaps(ci, &session, 1);
1959 if (ci->i_flushing_caps) {
1960 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
1961 __ceph_caps_used(ci),
1962 __ceph_caps_wanted(ci),
1963 cap->issued | cap->implemented,
1964 ci->i_flushing_caps, NULL);
1965 if (delayed) {
1966 spin_lock(&inode->i_lock);
1967 __cap_delay_requeue(mdsc, ci);
1968 spin_unlock(&inode->i_lock);
1969 }
1970 } else {
1971 spin_unlock(&inode->i_lock);
1972 }
1973}
1974
1943 1975
1944/* 1976/*
1945 * Take references to capabilities we hold, so that we don't release 1977 * Take references to capabilities we hold, so that we don't release
@@ -2687,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2687 ceph_add_cap(inode, session, cap_id, -1, 2719 ceph_add_cap(inode, session, cap_id, -1,
2688 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, 2720 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
2689 NULL /* no caps context */); 2721 NULL /* no caps context */);
2690 try_flush_caps(inode, session, NULL); 2722 kick_flushing_inode_caps(mdsc, session, inode);
2691 up_read(&mdsc->snap_rwsem); 2723 up_read(&mdsc->snap_rwsem);
2692 2724
2693 /* make sure we re-request max_size, if necessary */ 2725 /* make sure we re-request max_size, if necessary */
@@ -2785,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2785 case CEPH_CAP_OP_IMPORT: 2817 case CEPH_CAP_OP_IMPORT:
2786 handle_cap_import(mdsc, inode, h, session, 2818 handle_cap_import(mdsc, inode, h, session,
2787 snaptrace, snaptrace_len); 2819 snaptrace, snaptrace_len);
2788 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, 2820 ceph_check_caps(ceph_inode(inode), 0, session);
2789 session);
2790 goto done_unlocked; 2821 goto done_unlocked;
2791 } 2822 }
2792 2823
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e835eff551e3..5625463aa479 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -710,10 +710,6 @@ static int fill_inode(struct inode *inode,
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 ci->i_ceph_flags |= CEPH_I_COMPLETE;
711 ci->i_max_offset = 2; 711 ci->i_max_offset = 2;
712 } 712 }
713
714 /* it may be better to set st_size in getattr instead? */
715 if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
716 inode->i_size = ci->i_rbytes;
717 break; 713 break;
718 default: 714 default:
719 pr_err("fill_inode %llx.%llx BAD mode 0%o\n", 715 pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@@ -1819,7 +1815,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
1819 else 1815 else
1820 stat->dev = 0; 1816 stat->dev = 0;
1821 if (S_ISDIR(inode->i_mode)) { 1817 if (S_ISDIR(inode->i_mode)) {
1822 stat->size = ci->i_rbytes; 1818 if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
1819 RBYTES))
1820 stat->size = ci->i_rbytes;
1821 else
1822 stat->size = ci->i_files + ci->i_subdirs;
1823 stat->blocks = 0; 1823 stat->blocks = 0;
1824 stat->blksize = 65536; 1824 stat->blksize = 65536;
1825 } 1825 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1e30d194a8e3..a1ee8fa3a8e7 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
693 dout("choose_mds %p %llx.%llx " 693 dout("choose_mds %p %llx.%llx "
694 "frag %u mds%d (%d/%d)\n", 694 "frag %u mds%d (%d/%d)\n",
695 inode, ceph_vinop(inode), 695 inode, ceph_vinop(inode),
696 frag.frag, frag.mds, 696 frag.frag, mds,
697 (int)r, frag.ndist); 697 (int)r, frag.ndist);
698 return mds; 698 if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
699 CEPH_MDS_STATE_ACTIVE)
700 return mds;
699 } 701 }
700 702
701 /* since this file/dir wasn't known to be 703 /* since this file/dir wasn't known to be
@@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
708 dout("choose_mds %p %llx.%llx " 710 dout("choose_mds %p %llx.%llx "
709 "frag %u mds%d (auth)\n", 711 "frag %u mds%d (auth)\n",
710 inode, ceph_vinop(inode), frag.frag, mds); 712 inode, ceph_vinop(inode), frag.frag, mds);
711 return mds; 713 if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
714 CEPH_MDS_STATE_ACTIVE)
715 return mds;
712 } 716 }
713 } 717 }
714 } 718 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bf6f0f34082a..9c5085465a63 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
290 290
291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
293 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
294 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
293 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 295 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
294 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 296 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
295 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 297 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 6e12a6ba5f79..8c9eba6ef9df 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
219 struct rb_node **p; 219 struct rb_node **p;
220 struct rb_node *parent = NULL; 220 struct rb_node *parent = NULL;
221 struct ceph_inode_xattr *xattr = NULL; 221 struct ceph_inode_xattr *xattr = NULL;
222 int name_len = strlen(name);
222 int c; 223 int c;
223 224
224 p = &ci->i_xattrs.index.rb_node; 225 p = &ci->i_xattrs.index.rb_node;
@@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
226 parent = *p; 227 parent = *p;
227 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 228 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
228 c = strncmp(name, xattr->name, xattr->name_len); 229 c = strncmp(name, xattr->name, xattr->name_len);
230 if (c == 0 && name_len > xattr->name_len)
231 c = 1;
229 if (c < 0) 232 if (c < 0)
230 p = &(*p)->rb_left; 233 p = &(*p)->rb_left;
231 else if (c > 0) 234 else if (c > 0)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index ee45648b0d1a..7cb0f7f847e4 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -3,6 +3,7 @@ config CIFS
3 depends on INET 3 depends on INET
4 select NLS 4 select NLS
5 select CRYPTO 5 select CRYPTO
6 select CRYPTO_MD4
6 select CRYPTO_MD5 7 select CRYPTO_MD5
7 select CRYPTO_HMAC 8 select CRYPTO_HMAC
8 select CRYPTO_ARC4 9 select CRYPTO_ARC4
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 43b19dd39191..d87558448e3d 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o
10 10
11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o 11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 46af99ab3614..fe1683590828 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -452,6 +452,11 @@ A partial list of the supported mount options follows:
452 if oplock (caching token) is granted and held. Note that 452 if oplock (caching token) is granted and held. Note that
453 direct allows write operations larger than page size 453 direct allows write operations larger than page size
454 to be sent to the server. 454 to be sent to the server.
455 strictcache Use for switching on strict cache mode. In this mode the
456 client read from the cache all the time it has Oplock Level II,
457 otherwise - read from the server. All written data are stored
458 in the cache, but if the client doesn't have Exclusive Oplock,
459 it writes the data to the server.
455 acl Allow setfacl and getfacl to manage posix ACLs if server 460 acl Allow setfacl and getfacl to manage posix ACLs if server
456 supports them. (default) 461 supports them. (default)
457 noacl Do not allow setfacl and getfacl calls on this mount 462 noacl Do not allow setfacl and getfacl calls on this mount
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7ed36536e754..0a265ad9e426 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -282,8 +282,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
282 cFYI(1, "in %s", __func__); 282 cFYI(1, "in %s", __func__);
283 BUG_ON(IS_ROOT(mntpt)); 283 BUG_ON(IS_ROOT(mntpt));
284 284
285 xid = GetXid();
286
287 /* 285 /*
288 * The MSDFS spec states that paths in DFS referral requests and 286 * The MSDFS spec states that paths in DFS referral requests and
289 * responses must be prefixed by a single '\' character instead of 287 * responses must be prefixed by a single '\' character instead of
@@ -293,20 +291,21 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
293 mnt = ERR_PTR(-ENOMEM); 291 mnt = ERR_PTR(-ENOMEM);
294 full_path = build_path_from_dentry(mntpt); 292 full_path = build_path_from_dentry(mntpt);
295 if (full_path == NULL) 293 if (full_path == NULL)
296 goto free_xid; 294 goto cdda_exit;
297 295
298 cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); 296 cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
299 tlink = cifs_sb_tlink(cifs_sb); 297 tlink = cifs_sb_tlink(cifs_sb);
300 mnt = ERR_PTR(-EINVAL);
301 if (IS_ERR(tlink)) { 298 if (IS_ERR(tlink)) {
302 mnt = ERR_CAST(tlink); 299 mnt = ERR_CAST(tlink);
303 goto free_full_path; 300 goto free_full_path;
304 } 301 }
305 ses = tlink_tcon(tlink)->ses; 302 ses = tlink_tcon(tlink)->ses;
306 303
304 xid = GetXid();
307 rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, 305 rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
308 &num_referrals, &referrals, 306 &num_referrals, &referrals,
309 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 307 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
308 FreeXid(xid);
310 309
311 cifs_put_tlink(tlink); 310 cifs_put_tlink(tlink);
312 311
@@ -339,8 +338,7 @@ success:
339 free_dfs_info_array(referrals, num_referrals); 338 free_dfs_info_array(referrals, num_referrals);
340free_full_path: 339free_full_path:
341 kfree(full_path); 340 kfree(full_path);
342free_xid: 341cdda_exit:
343 FreeXid(xid);
344 cFYI(1, "leaving %s" , __func__); 342 cFYI(1, "leaving %s" , __func__);
345 return mnt; 343 return mnt;
346} 344}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1e7636b145a8..beeebf194234 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -372,6 +372,10 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
372 372
373 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), 373 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
374 GFP_KERNEL); 374 GFP_KERNEL);
375 if (!ppace) {
376 cERROR(1, "DACL memory allocation error");
377 return;
378 }
375 379
376 for (i = 0; i < num_aces; ++i) { 380 for (i = 0; i < num_aces; ++i) {
377 ppace[i] = (struct cifs_ace *) (acl_base + acl_size); 381 ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 66f3d50d0676..a51585f9852b 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -24,7 +24,6 @@
24#include "cifspdu.h" 24#include "cifspdu.h"
25#include "cifsglob.h" 25#include "cifsglob.h"
26#include "cifs_debug.h" 26#include "cifs_debug.h"
27#include "md5.h"
28#include "cifs_unicode.h" 27#include "cifs_unicode.h"
29#include "cifsproto.h" 28#include "cifsproto.h"
30#include "ntlmssp.h" 29#include "ntlmssp.h"
@@ -37,11 +36,6 @@
37/* Note that the smb header signature field on input contains the 36/* Note that the smb header signature field on input contains the
38 sequence number before this function is called */ 37 sequence number before this function is called */
39 38
40extern void mdfour(unsigned char *out, unsigned char *in, int n);
41extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
42extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
43 unsigned char *p24);
44
45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 39static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
46 struct TCP_Server_Info *server, char *signature) 40 struct TCP_Server_Info *server, char *signature)
47{ 41{
@@ -234,6 +228,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
234/* first calculate 24 bytes ntlm response and then 16 byte session key */ 228/* first calculate 24 bytes ntlm response and then 16 byte session key */
235int setup_ntlm_response(struct cifsSesInfo *ses) 229int setup_ntlm_response(struct cifsSesInfo *ses)
236{ 230{
231 int rc = 0;
237 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE; 232 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
238 char temp_key[CIFS_SESS_KEY_SIZE]; 233 char temp_key[CIFS_SESS_KEY_SIZE];
239 234
@@ -247,13 +242,26 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
247 } 242 }
248 ses->auth_key.len = temp_len; 243 ses->auth_key.len = temp_len;
249 244
250 SMBNTencrypt(ses->password, ses->server->cryptkey, 245 rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
251 ses->auth_key.response + CIFS_SESS_KEY_SIZE); 246 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
247 if (rc) {
248 cFYI(1, "%s Can't generate NTLM response, error: %d",
249 __func__, rc);
250 return rc;
251 }
252 252
253 E_md4hash(ses->password, temp_key); 253 rc = E_md4hash(ses->password, temp_key);
254 mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE); 254 if (rc) {
255 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
256 return rc;
257 }
255 258
256 return 0; 259 rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
260 if (rc)
261 cFYI(1, "%s Can't generate NTLM session key, error: %d",
262 __func__, rc);
263
264 return rc;
257} 265}
258 266
259#ifdef CONFIG_CIFS_WEAK_PW_HASH 267#ifdef CONFIG_CIFS_WEAK_PW_HASH
@@ -649,9 +657,10 @@ calc_seckey(struct cifsSesInfo *ses)
649 get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); 657 get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
650 658
651 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); 659 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
652 if (!tfm_arc4 || IS_ERR(tfm_arc4)) { 660 if (IS_ERR(tfm_arc4)) {
661 rc = PTR_ERR(tfm_arc4);
653 cERROR(1, "could not allocate crypto API arc4\n"); 662 cERROR(1, "could not allocate crypto API arc4\n");
654 return PTR_ERR(tfm_arc4); 663 return rc;
655 } 664 }
656 665
657 desc.tfm = tfm_arc4; 666 desc.tfm = tfm_arc4;
@@ -700,14 +709,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
700 unsigned int size; 709 unsigned int size;
701 710
702 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); 711 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
703 if (!server->secmech.hmacmd5 || 712 if (IS_ERR(server->secmech.hmacmd5)) {
704 IS_ERR(server->secmech.hmacmd5)) {
705 cERROR(1, "could not allocate crypto hmacmd5\n"); 713 cERROR(1, "could not allocate crypto hmacmd5\n");
706 return PTR_ERR(server->secmech.hmacmd5); 714 return PTR_ERR(server->secmech.hmacmd5);
707 } 715 }
708 716
709 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); 717 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
710 if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) { 718 if (IS_ERR(server->secmech.md5)) {
711 cERROR(1, "could not allocate crypto md5\n"); 719 cERROR(1, "could not allocate crypto md5\n");
712 rc = PTR_ERR(server->secmech.md5); 720 rc = PTR_ERR(server->secmech.md5);
713 goto crypto_allocate_md5_fail; 721 goto crypto_allocate_md5_fail;
diff --git a/fs/cifs/cifsencrypt.h b/fs/cifs/cifsencrypt.h
deleted file mode 100644
index 15d2ec006474..000000000000
--- a/fs/cifs/cifsencrypt.h
+++ /dev/null
@@ -1,33 +0,0 @@
1/*
2 * fs/cifs/cifsencrypt.h
3 *
4 * Copyright (c) International Business Machines Corp., 2005
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * Externs for misc. small encryption routines
8 * so we do not have to put them in cifsproto.h
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25/* md4.c */
26extern void mdfour(unsigned char *out, unsigned char *in, int n);
27/* smbdes.c */
28extern void E_P16(unsigned char *p14, unsigned char *p16);
29extern void E_P24(unsigned char *p21, const unsigned char *c8,
30 unsigned char *p24);
31
32
33
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a8323f1dc1c4..f2970136d17d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -600,10 +600,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
600{ 600{
601 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 601 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
602 ssize_t written; 602 ssize_t written;
603 int rc;
603 604
604 written = generic_file_aio_write(iocb, iov, nr_segs, pos); 605 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
605 if (!CIFS_I(inode)->clientCanCacheAll) 606
606 filemap_fdatawrite(inode->i_mapping); 607 if (CIFS_I(inode)->clientCanCacheAll)
608 return written;
609
610 rc = filemap_fdatawrite(inode->i_mapping);
611 if (rc)
612 cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode);
613
607 return written; 614 return written;
608} 615}
609 616
@@ -737,7 +744,7 @@ const struct file_operations cifs_file_strict_ops = {
737 .read = do_sync_read, 744 .read = do_sync_read,
738 .write = do_sync_write, 745 .write = do_sync_write,
739 .aio_read = cifs_strict_readv, 746 .aio_read = cifs_strict_readv,
740 .aio_write = cifs_file_aio_write, 747 .aio_write = cifs_strict_writev,
741 .open = cifs_open, 748 .open = cifs_open,
742 .release = cifs_close, 749 .release = cifs_close,
743 .lock = cifs_lock, 750 .lock = cifs_lock,
@@ -793,7 +800,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
793 .read = do_sync_read, 800 .read = do_sync_read,
794 .write = do_sync_write, 801 .write = do_sync_write,
795 .aio_read = cifs_strict_readv, 802 .aio_read = cifs_strict_readv,
796 .aio_write = cifs_file_aio_write, 803 .aio_write = cifs_strict_writev,
797 .open = cifs_open, 804 .open = cifs_open,
798 .release = cifs_close, 805 .release = cifs_close,
799 .fsync = cifs_strict_fsync, 806 .fsync = cifs_strict_fsync,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f23206d46531..4a3330235d55 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -85,7 +85,9 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
86 unsigned long nr_segs, loff_t pos); 86 unsigned long nr_segs, loff_t pos);
87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, 87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
88 size_t write_size, loff_t *poffset); 88 size_t write_size, loff_t *poffset);
89extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
90 unsigned long nr_segs, loff_t pos);
89extern int cifs_lock(struct file *, int, struct file_lock *); 91extern int cifs_lock(struct file *, int, struct file_lock *);
90extern int cifs_fsync(struct file *, int); 92extern int cifs_fsync(struct file *, int);
91extern int cifs_strict_fsync(struct file *, int); 93extern int cifs_strict_fsync(struct file *, int);
@@ -125,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125extern const struct export_operations cifs_export_ops; 127extern const struct export_operations cifs_export_ops;
126#endif /* EXPERIMENTAL */ 128#endif /* EXPERIMENTAL */
127 129
128#define CIFS_VERSION "1.69" 130#define CIFS_VERSION "1.70"
129#endif /* _CIFSFS_H */ 131#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5bfb75346cb0..17afb0fbcaed 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -166,6 +166,9 @@ struct TCP_Server_Info {
166 struct socket *ssocket; 166 struct socket *ssocket;
167 struct sockaddr_storage dstaddr; 167 struct sockaddr_storage dstaddr;
168 struct sockaddr_storage srcaddr; /* locally bind to this IP */ 168 struct sockaddr_storage srcaddr; /* locally bind to this IP */
169#ifdef CONFIG_NET_NS
170 struct net *net;
171#endif
169 wait_queue_head_t response_q; 172 wait_queue_head_t response_q;
170 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ 173 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
171 struct list_head pending_mid_q; 174 struct list_head pending_mid_q;
@@ -185,6 +188,8 @@ struct TCP_Server_Info {
185 /* multiplexed reads or writes */ 188 /* multiplexed reads or writes */
186 unsigned int maxBuf; /* maxBuf specifies the maximum */ 189 unsigned int maxBuf; /* maxBuf specifies the maximum */
187 /* message size the server can send or receive for non-raw SMBs */ 190 /* message size the server can send or receive for non-raw SMBs */
191 /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
192 /* when socket is setup (and during reconnect) before NegProt sent */
188 unsigned int max_rw; /* maxRw specifies the maximum */ 193 unsigned int max_rw; /* maxRw specifies the maximum */
189 /* message size the server can send or receive for */ 194 /* message size the server can send or receive for */
190 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ 195 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
@@ -217,6 +222,36 @@ struct TCP_Server_Info {
217}; 222};
218 223
219/* 224/*
225 * Macros to allow the TCP_Server_Info->net field and related code to drop out
226 * when CONFIG_NET_NS isn't set.
227 */
228
229#ifdef CONFIG_NET_NS
230
231static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
232{
233 return srv->net;
234}
235
236static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
237{
238 srv->net = net;
239}
240
241#else
242
243static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
244{
245 return &init_net;
246}
247
248static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
249{
250}
251
252#endif
253
254/*
220 * Session structure. One of these for each uid session with a particular host 255 * Session structure. One of these for each uid session with a particular host
221 */ 256 */
222struct cifsSesInfo { 257struct cifsSesInfo {
@@ -619,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
619#define MID_REQUEST_SUBMITTED 2 654#define MID_REQUEST_SUBMITTED 2
620#define MID_RESPONSE_RECEIVED 4 655#define MID_RESPONSE_RECEIVED 4
621#define MID_RETRY_NEEDED 8 /* session closed while this request out */ 656#define MID_RETRY_NEEDED 8 /* session closed while this request out */
622#define MID_NO_RESP_NEEDED 0x10 657#define MID_RESPONSE_MALFORMED 0x10
623 658
624/* Types of response buffer returned from SendReceive2 */ 659/* Types of response buffer returned from SendReceive2 */
625#define CIFS_NO_BUFFER 0 /* Response buffer not returned */ 660#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 982895fa7615..8096f27ad9a8 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -85,6 +85,8 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
85extern bool is_valid_oplock_break(struct smb_hdr *smb, 85extern bool is_valid_oplock_break(struct smb_hdr *smb,
86 struct TCP_Server_Info *); 86 struct TCP_Server_Info *);
87extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); 87extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
88extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
89 unsigned int bytes_written);
88extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 90extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
89extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 91extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
90extern unsigned int smbCalcSize(struct smb_hdr *ptr); 92extern unsigned int smbCalcSize(struct smb_hdr *ptr);
@@ -373,7 +375,7 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
373extern int cifs_verify_signature(struct smb_hdr *, 375extern int cifs_verify_signature(struct smb_hdr *,
374 struct TCP_Server_Info *server, 376 struct TCP_Server_Info *server,
375 __u32 expected_sequence_number); 377 __u32 expected_sequence_number);
376extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); 378extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
377extern int setup_ntlm_response(struct cifsSesInfo *); 379extern int setup_ntlm_response(struct cifsSesInfo *);
378extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *); 380extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
379extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); 381extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
@@ -423,4 +425,11 @@ extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
423extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, 425extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
424 const unsigned char *path, 426 const unsigned char *path,
425 struct cifs_sb_info *cifs_sb, int xid); 427 struct cifs_sb_info *cifs_sb, int xid);
428extern int mdfour(unsigned char *, unsigned char *, int);
429extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
430extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
431 unsigned char *p24);
432extern void E_P16(unsigned char *p14, unsigned char *p16);
433extern void E_P24(unsigned char *p21, const unsigned char *c8,
434 unsigned char *p24);
426#endif /* _CIFSPROTO_H */ 435#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3106f5e5c633..904aa47e3515 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
136 } 136 }
137 } 137 }
138 138
139 if (ses->status == CifsExiting)
140 return -EIO;
141
142 /* 139 /*
143 * Give demultiplex thread up to 10 seconds to reconnect, should be 140 * Give demultiplex thread up to 10 seconds to reconnect, should be
144 * greater than cifs socket timeout which is 7 seconds 141 * greater than cifs socket timeout which is 7 seconds
@@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
156 * retrying until process is killed or server comes 153 * retrying until process is killed or server comes
157 * back on-line 154 * back on-line
158 */ 155 */
159 if (!tcon->retry || ses->status == CifsExiting) { 156 if (!tcon->retry) {
160 cFYI(1, "gave up waiting on reconnect in smb_init"); 157 cFYI(1, "gave up waiting on reconnect in smb_init");
161 return -EHOSTDOWN; 158 return -EHOSTDOWN;
162 } 159 }
@@ -4914,7 +4911,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4914 __u16 fid, __u32 pid_of_opener, bool SetAllocation) 4911 __u16 fid, __u32 pid_of_opener, bool SetAllocation)
4915{ 4912{
4916 struct smb_com_transaction2_sfi_req *pSMB = NULL; 4913 struct smb_com_transaction2_sfi_req *pSMB = NULL;
4917 char *data_offset;
4918 struct file_end_of_file_info *parm_data; 4914 struct file_end_of_file_info *parm_data;
4919 int rc = 0; 4915 int rc = 0;
4920 __u16 params, param_offset, offset, byte_count, count; 4916 __u16 params, param_offset, offset, byte_count, count;
@@ -4938,8 +4934,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4938 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; 4934 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
4939 offset = param_offset + params; 4935 offset = param_offset + params;
4940 4936
4941 data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
4942
4943 count = sizeof(struct file_end_of_file_info); 4937 count = sizeof(struct file_end_of_file_info);
4944 pSMB->MaxParameterCount = cpu_to_le16(2); 4938 pSMB->MaxParameterCount = cpu_to_le16(2);
4945 /* BB find exact max SMB PDU from sess structure BB */ 4939 /* BB find exact max SMB PDU from sess structure BB */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 18d3c7724d6e..8d6c17ab593d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -55,9 +55,6 @@
55/* SMB echo "timeout" -- FIXME: tunable? */ 55/* SMB echo "timeout" -- FIXME: tunable? */
56#define SMB_ECHO_INTERVAL (60 * HZ) 56#define SMB_ECHO_INTERVAL (60 * HZ)
57 57
58extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
59 unsigned char *p24);
60
61extern mempool_t *cifs_req_poolp; 58extern mempool_t *cifs_req_poolp;
62 59
63struct smb_vol { 60struct smb_vol {
@@ -87,6 +84,7 @@ struct smb_vol {
87 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ 84 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
88 bool server_ino:1; /* use inode numbers from server ie UniqueId */ 85 bool server_ino:1; /* use inode numbers from server ie UniqueId */
89 bool direct_io:1; 86 bool direct_io:1;
87 bool strict_io:1; /* strict cache behavior */
90 bool remap:1; /* set to remap seven reserved chars in filenames */ 88 bool remap:1; /* set to remap seven reserved chars in filenames */
91 bool posix_paths:1; /* unset to not ask for posix pathnames. */ 89 bool posix_paths:1; /* unset to not ask for posix pathnames. */
92 bool no_linux_ext:1; 90 bool no_linux_ext:1;
@@ -339,8 +337,13 @@ cifs_echo_request(struct work_struct *work)
339 struct TCP_Server_Info *server = container_of(work, 337 struct TCP_Server_Info *server = container_of(work,
340 struct TCP_Server_Info, echo.work); 338 struct TCP_Server_Info, echo.work);
341 339
342 /* no need to ping if we got a response recently */ 340 /*
343 if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) 341 * We cannot send an echo until the NEGOTIATE_PROTOCOL request is
342 * done, which is indicated by maxBuf != 0. Also, no need to ping if
343 * we got a response recently
344 */
345 if (server->maxBuf == 0 ||
346 time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
344 goto requeue_echo; 347 goto requeue_echo;
345 348
346 rc = CIFSSMBEcho(server); 349 rc = CIFSSMBEcho(server);
@@ -580,14 +583,23 @@ incomplete_rcv:
580 else if (reconnect == 1) 583 else if (reconnect == 1)
581 continue; 584 continue;
582 585
583 length += 4; /* account for rfc1002 hdr */ 586 total_read += 4; /* account for rfc1002 hdr */
584 587
588 dump_smb(smb_buffer, total_read);
585 589
586 dump_smb(smb_buffer, length); 590 /*
587 if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) { 591 * We know that we received enough to get to the MID as we
588 cifs_dump_mem("Bad SMB: ", smb_buffer, 48); 592 * checked the pdu_length earlier. Now check to see
589 continue; 593 * if the rest of the header is OK. We borrow the length
590 } 594 * var for the rest of the loop to avoid a new stack var.
595 *
596 * 48 bytes is enough to display the header and a little bit
597 * into the payload for debugging purposes.
598 */
599 length = checkSMB(smb_buffer, smb_buffer->Mid, total_read);
600 if (length != 0)
601 cifs_dump_mem("Bad SMB: ", smb_buffer,
602 min_t(unsigned int, total_read, 48));
591 603
592 mid_entry = NULL; 604 mid_entry = NULL;
593 server->lstrp = jiffies; 605 server->lstrp = jiffies;
@@ -599,7 +611,8 @@ incomplete_rcv:
599 if ((mid_entry->mid == smb_buffer->Mid) && 611 if ((mid_entry->mid == smb_buffer->Mid) &&
600 (mid_entry->midState == MID_REQUEST_SUBMITTED) && 612 (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
601 (mid_entry->command == smb_buffer->Command)) { 613 (mid_entry->command == smb_buffer->Command)) {
602 if (check2ndT2(smb_buffer,server->maxBuf) > 0) { 614 if (length == 0 &&
615 check2ndT2(smb_buffer, server->maxBuf) > 0) {
603 /* We have a multipart transact2 resp */ 616 /* We have a multipart transact2 resp */
604 isMultiRsp = true; 617 isMultiRsp = true;
605 if (mid_entry->resp_buf) { 618 if (mid_entry->resp_buf) {
@@ -634,12 +647,17 @@ incomplete_rcv:
634 mid_entry->resp_buf = smb_buffer; 647 mid_entry->resp_buf = smb_buffer;
635 mid_entry->largeBuf = isLargeBuf; 648 mid_entry->largeBuf = isLargeBuf;
636multi_t2_fnd: 649multi_t2_fnd:
637 mid_entry->midState = MID_RESPONSE_RECEIVED; 650 if (length == 0)
638 list_del_init(&mid_entry->qhead); 651 mid_entry->midState =
639 mid_entry->callback(mid_entry); 652 MID_RESPONSE_RECEIVED;
653 else
654 mid_entry->midState =
655 MID_RESPONSE_MALFORMED;
640#ifdef CONFIG_CIFS_STATS2 656#ifdef CONFIG_CIFS_STATS2
641 mid_entry->when_received = jiffies; 657 mid_entry->when_received = jiffies;
642#endif 658#endif
659 list_del_init(&mid_entry->qhead);
660 mid_entry->callback(mid_entry);
643 break; 661 break;
644 } 662 }
645 mid_entry = NULL; 663 mid_entry = NULL;
@@ -655,6 +673,9 @@ multi_t2_fnd:
655 else 673 else
656 smallbuf = NULL; 674 smallbuf = NULL;
657 } 675 }
676 } else if (length != 0) {
677 /* response sanity checks failed */
678 continue;
658 } else if (!is_valid_oplock_break(smb_buffer, server) && 679 } else if (!is_valid_oplock_break(smb_buffer, server) &&
659 !isMultiRsp) { 680 !isMultiRsp) {
660 cERROR(1, "No task to wake, unknown frame received! " 681 cERROR(1, "No task to wake, unknown frame received! "
@@ -1344,6 +1365,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1344 vol->direct_io = 1; 1365 vol->direct_io = 1;
1345 } else if (strnicmp(data, "forcedirectio", 13) == 0) { 1366 } else if (strnicmp(data, "forcedirectio", 13) == 0) {
1346 vol->direct_io = 1; 1367 vol->direct_io = 1;
1368 } else if (strnicmp(data, "strictcache", 11) == 0) {
1369 vol->strict_io = 1;
1347 } else if (strnicmp(data, "noac", 4) == 0) { 1370 } else if (strnicmp(data, "noac", 4) == 0) {
1348 printk(KERN_WARNING "CIFS: Mount option noac not " 1371 printk(KERN_WARNING "CIFS: Mount option noac not "
1349 "supported. Instead set " 1372 "supported. Instead set "
@@ -1568,6 +1591,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1568 1591
1569 spin_lock(&cifs_tcp_ses_lock); 1592 spin_lock(&cifs_tcp_ses_lock);
1570 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { 1593 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
1594 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
1595 continue;
1596
1571 if (!match_address(server, addr, 1597 if (!match_address(server, addr,
1572 (struct sockaddr *)&vol->srcaddr)) 1598 (struct sockaddr *)&vol->srcaddr))
1573 continue; 1599 continue;
@@ -1598,6 +1624,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
1598 return; 1624 return;
1599 } 1625 }
1600 1626
1627 put_net(cifs_net_ns(server));
1628
1601 list_del_init(&server->tcp_ses_list); 1629 list_del_init(&server->tcp_ses_list);
1602 spin_unlock(&cifs_tcp_ses_lock); 1630 spin_unlock(&cifs_tcp_ses_lock);
1603 1631
@@ -1672,6 +1700,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1672 goto out_err; 1700 goto out_err;
1673 } 1701 }
1674 1702
1703 cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
1675 tcp_ses->hostname = extract_hostname(volume_info->UNC); 1704 tcp_ses->hostname = extract_hostname(volume_info->UNC);
1676 if (IS_ERR(tcp_ses->hostname)) { 1705 if (IS_ERR(tcp_ses->hostname)) {
1677 rc = PTR_ERR(tcp_ses->hostname); 1706 rc = PTR_ERR(tcp_ses->hostname);
@@ -1752,6 +1781,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1752out_err_crypto_release: 1781out_err_crypto_release:
1753 cifs_crypto_shash_release(tcp_ses); 1782 cifs_crypto_shash_release(tcp_ses);
1754 1783
1784 put_net(cifs_net_ns(tcp_ses));
1785
1755out_err: 1786out_err:
1756 if (tcp_ses) { 1787 if (tcp_ses) {
1757 if (!IS_ERR(tcp_ses->hostname)) 1788 if (!IS_ERR(tcp_ses->hostname))
@@ -2263,8 +2294,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
2263 } 2294 }
2264 2295
2265 if (socket == NULL) { 2296 if (socket == NULL) {
2266 rc = sock_create_kern(sfamily, SOCK_STREAM, 2297 rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM,
2267 IPPROTO_TCP, &socket); 2298 IPPROTO_TCP, &socket, 1);
2268 if (rc < 0) { 2299 if (rc < 0) {
2269 cERROR(1, "Error %d creating socket", rc); 2300 cERROR(1, "Error %d creating socket", rc);
2270 server->ssocket = NULL; 2301 server->ssocket = NULL;
@@ -2576,6 +2607,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2576 if (pvolume_info->multiuser) 2607 if (pvolume_info->multiuser)
2577 cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | 2608 cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
2578 CIFS_MOUNT_NO_PERM); 2609 CIFS_MOUNT_NO_PERM);
2610 if (pvolume_info->strict_io)
2611 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
2579 if (pvolume_info->direct_io) { 2612 if (pvolume_info->direct_io) {
2580 cFYI(1, "mounting share using direct i/o"); 2613 cFYI(1, "mounting share using direct i/o");
2581 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; 2614 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
@@ -2977,7 +3010,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2977 bcc_ptr); 3010 bcc_ptr);
2978 else 3011 else
2979#endif /* CIFS_WEAK_PW_HASH */ 3012#endif /* CIFS_WEAK_PW_HASH */
2980 SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr); 3013 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
3014 bcc_ptr);
2981 3015
2982 bcc_ptr += CIFS_AUTH_RESP_SIZE; 3016 bcc_ptr += CIFS_AUTH_RESP_SIZE;
2983 if (ses->capabilities & CAP_UNICODE) { 3017 if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index d7d65a70678e..e964b1cd5dd0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -346,7 +346,6 @@ int cifs_open(struct inode *inode, struct file *file)
346 struct cifsTconInfo *tcon; 346 struct cifsTconInfo *tcon;
347 struct tcon_link *tlink; 347 struct tcon_link *tlink;
348 struct cifsFileInfo *pCifsFile = NULL; 348 struct cifsFileInfo *pCifsFile = NULL;
349 struct cifsInodeInfo *pCifsInode;
350 char *full_path = NULL; 349 char *full_path = NULL;
351 bool posix_open_ok = false; 350 bool posix_open_ok = false;
352 __u16 netfid; 351 __u16 netfid;
@@ -361,8 +360,6 @@ int cifs_open(struct inode *inode, struct file *file)
361 } 360 }
362 tcon = tlink_tcon(tlink); 361 tcon = tlink_tcon(tlink);
363 362
364 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
365
366 full_path = build_path_from_dentry(file->f_path.dentry); 363 full_path = build_path_from_dentry(file->f_path.dentry);
367 if (full_path == NULL) { 364 if (full_path == NULL) {
368 rc = -ENOMEM; 365 rc = -ENOMEM;
@@ -848,7 +845,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
848} 845}
849 846
850/* update the file size (if needed) after a write */ 847/* update the file size (if needed) after a write */
851static void 848void
852cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 849cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
853 unsigned int bytes_written) 850 unsigned int bytes_written)
854{ 851{
@@ -1146,7 +1143,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1146 char *write_data; 1143 char *write_data;
1147 int rc = -EFAULT; 1144 int rc = -EFAULT;
1148 int bytes_written = 0; 1145 int bytes_written = 0;
1149 struct cifs_sb_info *cifs_sb;
1150 struct inode *inode; 1146 struct inode *inode;
1151 struct cifsFileInfo *open_file; 1147 struct cifsFileInfo *open_file;
1152 1148
@@ -1154,7 +1150,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1154 return -EFAULT; 1150 return -EFAULT;
1155 1151
1156 inode = page->mapping->host; 1152 inode = page->mapping->host;
1157 cifs_sb = CIFS_SB(inode->i_sb);
1158 1153
1159 offset += (loff_t)from; 1154 offset += (loff_t)from;
1160 write_data = kmap(page); 1155 write_data = kmap(page);
@@ -1619,13 +1614,215 @@ int cifs_flush(struct file *file, fl_owner_t id)
1619 return rc; 1614 return rc;
1620} 1615}
1621 1616
1617static int
1618cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
1619{
1620 int rc = 0;
1621 unsigned long i;
1622
1623 for (i = 0; i < num_pages; i++) {
1624 pages[i] = alloc_page(__GFP_HIGHMEM);
1625 if (!pages[i]) {
1626 /*
1627 * save number of pages we have already allocated and
1628 * return with ENOMEM error
1629 */
1630 num_pages = i;
1631 rc = -ENOMEM;
1632 goto error;
1633 }
1634 }
1635
1636 return rc;
1637
1638error:
1639 for (i = 0; i < num_pages; i++)
1640 put_page(pages[i]);
1641 return rc;
1642}
1643
1644static inline
1645size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
1646{
1647 size_t num_pages;
1648 size_t clen;
1649
1650 clen = min_t(const size_t, len, wsize);
1651 num_pages = clen / PAGE_CACHE_SIZE;
1652 if (clen % PAGE_CACHE_SIZE)
1653 num_pages++;
1654
1655 if (cur_len)
1656 *cur_len = clen;
1657
1658 return num_pages;
1659}
1660
1661static ssize_t
1662cifs_iovec_write(struct file *file, const struct iovec *iov,
1663 unsigned long nr_segs, loff_t *poffset)
1664{
1665 unsigned int written;
1666 unsigned long num_pages, npages, i;
1667 size_t copied, len, cur_len;
1668 ssize_t total_written = 0;
1669 struct kvec *to_send;
1670 struct page **pages;
1671 struct iov_iter it;
1672 struct inode *inode;
1673 struct cifsFileInfo *open_file;
1674 struct cifsTconInfo *pTcon;
1675 struct cifs_sb_info *cifs_sb;
1676 int xid, rc;
1677
1678 len = iov_length(iov, nr_segs);
1679 if (!len)
1680 return 0;
1681
1682 rc = generic_write_checks(file, poffset, &len, 0);
1683 if (rc)
1684 return rc;
1685
1686 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1687 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
1688
1689 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
1690 if (!pages)
1691 return -ENOMEM;
1692
1693 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
1694 if (!to_send) {
1695 kfree(pages);
1696 return -ENOMEM;
1697 }
1698
1699 rc = cifs_write_allocate_pages(pages, num_pages);
1700 if (rc) {
1701 kfree(pages);
1702 kfree(to_send);
1703 return rc;
1704 }
1705
1706 xid = GetXid();
1707 open_file = file->private_data;
1708 pTcon = tlink_tcon(open_file->tlink);
1709 inode = file->f_path.dentry->d_inode;
1710
1711 iov_iter_init(&it, iov, nr_segs, len, 0);
1712 npages = num_pages;
1713
1714 do {
1715 size_t save_len = cur_len;
1716 for (i = 0; i < npages; i++) {
1717 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
1718 copied = iov_iter_copy_from_user(pages[i], &it, 0,
1719 copied);
1720 cur_len -= copied;
1721 iov_iter_advance(&it, copied);
1722 to_send[i+1].iov_base = kmap(pages[i]);
1723 to_send[i+1].iov_len = copied;
1724 }
1725
1726 cur_len = save_len - cur_len;
1727
1728 do {
1729 if (open_file->invalidHandle) {
1730 rc = cifs_reopen_file(open_file, false);
1731 if (rc != 0)
1732 break;
1733 }
1734 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid,
1735 cur_len, *poffset, &written,
1736 to_send, npages, 0);
1737 } while (rc == -EAGAIN);
1738
1739 for (i = 0; i < npages; i++)
1740 kunmap(pages[i]);
1741
1742 if (written) {
1743 len -= written;
1744 total_written += written;
1745 cifs_update_eof(CIFS_I(inode), *poffset, written);
1746 *poffset += written;
1747 } else if (rc < 0) {
1748 if (!total_written)
1749 total_written = rc;
1750 break;
1751 }
1752
1753 /* get length and number of kvecs of the next write */
1754 npages = get_numpages(cifs_sb->wsize, len, &cur_len);
1755 } while (len > 0);
1756
1757 if (total_written > 0) {
1758 spin_lock(&inode->i_lock);
1759 if (*poffset > inode->i_size)
1760 i_size_write(inode, *poffset);
1761 spin_unlock(&inode->i_lock);
1762 }
1763
1764 cifs_stats_bytes_written(pTcon, total_written);
1765 mark_inode_dirty_sync(inode);
1766
1767 for (i = 0; i < num_pages; i++)
1768 put_page(pages[i]);
1769 kfree(to_send);
1770 kfree(pages);
1771 FreeXid(xid);
1772 return total_written;
1773}
1774
1775static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1776 unsigned long nr_segs, loff_t pos)
1777{
1778 ssize_t written;
1779 struct inode *inode;
1780
1781 inode = iocb->ki_filp->f_path.dentry->d_inode;
1782
1783 /*
1784 * BB - optimize the way when signing is disabled. We can drop this
1785 * extra memory-to-memory copying and use iovec buffers for constructing
1786 * write request.
1787 */
1788
1789 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
1790 if (written > 0) {
1791 CIFS_I(inode)->invalid_mapping = true;
1792 iocb->ki_pos = pos;
1793 }
1794
1795 return written;
1796}
1797
1798ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
1799 unsigned long nr_segs, loff_t pos)
1800{
1801 struct inode *inode;
1802
1803 inode = iocb->ki_filp->f_path.dentry->d_inode;
1804
1805 if (CIFS_I(inode)->clientCanCacheAll)
1806 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1807
1808 /*
1809 * In strict cache mode we need to write the data to the server exactly
1810 * from the pos to pos+len-1 rather than flush all affected pages
1811 * because it may cause a error with mandatory locks on these pages but
1812 * not on the region from pos to ppos+len-1.
1813 */
1814
1815 return cifs_user_writev(iocb, iov, nr_segs, pos);
1816}
1817
1622static ssize_t 1818static ssize_t
1623cifs_iovec_read(struct file *file, const struct iovec *iov, 1819cifs_iovec_read(struct file *file, const struct iovec *iov,
1624 unsigned long nr_segs, loff_t *poffset) 1820 unsigned long nr_segs, loff_t *poffset)
1625{ 1821{
1626 int rc; 1822 int rc;
1627 int xid; 1823 int xid;
1628 unsigned int total_read, bytes_read = 0; 1824 ssize_t total_read;
1825 unsigned int bytes_read = 0;
1629 size_t len, cur_len; 1826 size_t len, cur_len;
1630 int iov_offset = 0; 1827 int iov_offset = 0;
1631 struct cifs_sb_info *cifs_sb; 1828 struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 306769de2fb5..e8804d373404 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -28,7 +28,6 @@
28#include "cifsproto.h" 28#include "cifsproto.h"
29#include "cifs_debug.h" 29#include "cifs_debug.h"
30#include "cifs_fs_sb.h" 30#include "cifs_fs_sb.h"
31#include "md5.h"
32 31
33#define CIFS_MF_SYMLINK_LEN_OFFSET (4+1) 32#define CIFS_MF_SYMLINK_LEN_OFFSET (4+1)
34#define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1)) 33#define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1))
@@ -47,6 +46,45 @@
47 md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15] 46 md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15]
48 47
49static int 48static int
49symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
50{
51 int rc;
52 unsigned int size;
53 struct crypto_shash *md5;
54 struct sdesc *sdescmd5;
55
56 md5 = crypto_alloc_shash("md5", 0, 0);
57 if (IS_ERR(md5)) {
58 rc = PTR_ERR(md5);
59 cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc);
60 return rc;
61 }
62 size = sizeof(struct shash_desc) + crypto_shash_descsize(md5);
63 sdescmd5 = kmalloc(size, GFP_KERNEL);
64 if (!sdescmd5) {
65 rc = -ENOMEM;
66 cERROR(1, "%s: Memory allocation failure\n", __func__);
67 goto symlink_hash_err;
68 }
69 sdescmd5->shash.tfm = md5;
70 sdescmd5->shash.flags = 0x0;
71
72 rc = crypto_shash_init(&sdescmd5->shash);
73 if (rc) {
74 cERROR(1, "%s: Could not init md5 shash\n", __func__);
75 goto symlink_hash_err;
76 }
77 crypto_shash_update(&sdescmd5->shash, link_str, link_len);
78 rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
79
80symlink_hash_err:
81 crypto_free_shash(md5);
82 kfree(sdescmd5);
83
84 return rc;
85}
86
87static int
50CIFSParseMFSymlink(const u8 *buf, 88CIFSParseMFSymlink(const u8 *buf,
51 unsigned int buf_len, 89 unsigned int buf_len,
52 unsigned int *_link_len, 90 unsigned int *_link_len,
@@ -56,7 +94,6 @@ CIFSParseMFSymlink(const u8 *buf,
56 unsigned int link_len; 94 unsigned int link_len;
57 const char *md5_str1; 95 const char *md5_str1;
58 const char *link_str; 96 const char *link_str;
59 struct MD5Context md5_ctx;
60 u8 md5_hash[16]; 97 u8 md5_hash[16];
61 char md5_str2[34]; 98 char md5_str2[34];
62 99
@@ -70,9 +107,11 @@ CIFSParseMFSymlink(const u8 *buf,
70 if (rc != 1) 107 if (rc != 1)
71 return -EINVAL; 108 return -EINVAL;
72 109
73 cifs_MD5_init(&md5_ctx); 110 rc = symlink_hash(link_len, link_str, md5_hash);
74 cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); 111 if (rc) {
75 cifs_MD5_final(md5_hash, &md5_ctx); 112 cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
113 return rc;
114 }
76 115
77 snprintf(md5_str2, sizeof(md5_str2), 116 snprintf(md5_str2, sizeof(md5_str2),
78 CIFS_MF_SYMLINK_MD5_FORMAT, 117 CIFS_MF_SYMLINK_MD5_FORMAT,
@@ -94,9 +133,9 @@ CIFSParseMFSymlink(const u8 *buf,
94static int 133static int
95CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) 134CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
96{ 135{
136 int rc;
97 unsigned int link_len; 137 unsigned int link_len;
98 unsigned int ofs; 138 unsigned int ofs;
99 struct MD5Context md5_ctx;
100 u8 md5_hash[16]; 139 u8 md5_hash[16];
101 140
102 if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) 141 if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE)
@@ -107,9 +146,11 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
107 if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) 146 if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
108 return -ENAMETOOLONG; 147 return -ENAMETOOLONG;
109 148
110 cifs_MD5_init(&md5_ctx); 149 rc = symlink_hash(link_len, link_str, md5_hash);
111 cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); 150 if (rc) {
112 cifs_MD5_final(md5_hash, &md5_ctx); 151 cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
152 return rc;
153 }
113 154
114 snprintf(buf, buf_len, 155 snprintf(buf, buf_len,
115 CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, 156 CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT,
diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c
deleted file mode 100644
index a725c2609d67..000000000000
--- a/fs/cifs/md4.c
+++ /dev/null
@@ -1,205 +0,0 @@
1/*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4 a implementation of MD4 designed for use in the SMB authentication protocol
5 Copyright (C) Andrew Tridgell 1997-1998.
6 Modified by Steve French (sfrench@us.ibm.com) 2002-2003
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21*/
22#include <linux/module.h>
23#include <linux/fs.h>
24#include "cifsencrypt.h"
25
26/* NOTE: This code makes no attempt to be fast! */
27
28static __u32
29F(__u32 X, __u32 Y, __u32 Z)
30{
31 return (X & Y) | ((~X) & Z);
32}
33
34static __u32
35G(__u32 X, __u32 Y, __u32 Z)
36{
37 return (X & Y) | (X & Z) | (Y & Z);
38}
39
40static __u32
41H(__u32 X, __u32 Y, __u32 Z)
42{
43 return X ^ Y ^ Z;
44}
45
46static __u32
47lshift(__u32 x, int s)
48{
49 x &= 0xFFFFFFFF;
50 return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
51}
52
53#define ROUND1(a,b,c,d,k,s) (*a) = lshift((*a) + F(*b,*c,*d) + X[k], s)
54#define ROUND2(a,b,c,d,k,s) (*a) = lshift((*a) + G(*b,*c,*d) + X[k] + (__u32)0x5A827999,s)
55#define ROUND3(a,b,c,d,k,s) (*a) = lshift((*a) + H(*b,*c,*d) + X[k] + (__u32)0x6ED9EBA1,s)
56
57/* this applies md4 to 64 byte chunks */
58static void
59mdfour64(__u32 *M, __u32 *A, __u32 *B, __u32 *C, __u32 *D)
60{
61 int j;
62 __u32 AA, BB, CC, DD;
63 __u32 X[16];
64
65
66 for (j = 0; j < 16; j++)
67 X[j] = M[j];
68
69 AA = *A;
70 BB = *B;
71 CC = *C;
72 DD = *D;
73
74 ROUND1(A, B, C, D, 0, 3);
75 ROUND1(D, A, B, C, 1, 7);
76 ROUND1(C, D, A, B, 2, 11);
77 ROUND1(B, C, D, A, 3, 19);
78 ROUND1(A, B, C, D, 4, 3);
79 ROUND1(D, A, B, C, 5, 7);
80 ROUND1(C, D, A, B, 6, 11);
81 ROUND1(B, C, D, A, 7, 19);
82 ROUND1(A, B, C, D, 8, 3);
83 ROUND1(D, A, B, C, 9, 7);
84 ROUND1(C, D, A, B, 10, 11);
85 ROUND1(B, C, D, A, 11, 19);
86 ROUND1(A, B, C, D, 12, 3);
87 ROUND1(D, A, B, C, 13, 7);
88 ROUND1(C, D, A, B, 14, 11);
89 ROUND1(B, C, D, A, 15, 19);
90
91 ROUND2(A, B, C, D, 0, 3);
92 ROUND2(D, A, B, C, 4, 5);
93 ROUND2(C, D, A, B, 8, 9);
94 ROUND2(B, C, D, A, 12, 13);
95 ROUND2(A, B, C, D, 1, 3);
96 ROUND2(D, A, B, C, 5, 5);
97 ROUND2(C, D, A, B, 9, 9);
98 ROUND2(B, C, D, A, 13, 13);
99 ROUND2(A, B, C, D, 2, 3);
100 ROUND2(D, A, B, C, 6, 5);
101 ROUND2(C, D, A, B, 10, 9);
102 ROUND2(B, C, D, A, 14, 13);
103 ROUND2(A, B, C, D, 3, 3);
104 ROUND2(D, A, B, C, 7, 5);
105 ROUND2(C, D, A, B, 11, 9);
106 ROUND2(B, C, D, A, 15, 13);
107
108 ROUND3(A, B, C, D, 0, 3);
109 ROUND3(D, A, B, C, 8, 9);
110 ROUND3(C, D, A, B, 4, 11);
111 ROUND3(B, C, D, A, 12, 15);
112 ROUND3(A, B, C, D, 2, 3);
113 ROUND3(D, A, B, C, 10, 9);
114 ROUND3(C, D, A, B, 6, 11);
115 ROUND3(B, C, D, A, 14, 15);
116 ROUND3(A, B, C, D, 1, 3);
117 ROUND3(D, A, B, C, 9, 9);
118 ROUND3(C, D, A, B, 5, 11);
119 ROUND3(B, C, D, A, 13, 15);
120 ROUND3(A, B, C, D, 3, 3);
121 ROUND3(D, A, B, C, 11, 9);
122 ROUND3(C, D, A, B, 7, 11);
123 ROUND3(B, C, D, A, 15, 15);
124
125 *A += AA;
126 *B += BB;
127 *C += CC;
128 *D += DD;
129
130 *A &= 0xFFFFFFFF;
131 *B &= 0xFFFFFFFF;
132 *C &= 0xFFFFFFFF;
133 *D &= 0xFFFFFFFF;
134
135 for (j = 0; j < 16; j++)
136 X[j] = 0;
137}
138
139static void
140copy64(__u32 *M, unsigned char *in)
141{
142 int i;
143
144 for (i = 0; i < 16; i++)
145 M[i] = (in[i * 4 + 3] << 24) | (in[i * 4 + 2] << 16) |
146 (in[i * 4 + 1] << 8) | (in[i * 4 + 0] << 0);
147}
148
149static void
150copy4(unsigned char *out, __u32 x)
151{
152 out[0] = x & 0xFF;
153 out[1] = (x >> 8) & 0xFF;
154 out[2] = (x >> 16) & 0xFF;
155 out[3] = (x >> 24) & 0xFF;
156}
157
158/* produce a md4 message digest from data of length n bytes */
159void
160mdfour(unsigned char *out, unsigned char *in, int n)
161{
162 unsigned char buf[128];
163 __u32 M[16];
164 __u32 b = n * 8;
165 int i;
166 __u32 A = 0x67452301;
167 __u32 B = 0xefcdab89;
168 __u32 C = 0x98badcfe;
169 __u32 D = 0x10325476;
170
171 while (n > 64) {
172 copy64(M, in);
173 mdfour64(M, &A, &B, &C, &D);
174 in += 64;
175 n -= 64;
176 }
177
178 for (i = 0; i < 128; i++)
179 buf[i] = 0;
180 memcpy(buf, in, n);
181 buf[n] = 0x80;
182
183 if (n <= 55) {
184 copy4(buf + 56, b);
185 copy64(M, buf);
186 mdfour64(M, &A, &B, &C, &D);
187 } else {
188 copy4(buf + 120, b);
189 copy64(M, buf);
190 mdfour64(M, &A, &B, &C, &D);
191 copy64(M, buf + 64);
192 mdfour64(M, &A, &B, &C, &D);
193 }
194
195 for (i = 0; i < 128; i++)
196 buf[i] = 0;
197 copy64(M, buf);
198
199 copy4(out, A);
200 copy4(out + 4, B);
201 copy4(out + 8, C);
202 copy4(out + 12, D);
203
204 A = B = C = D = 0;
205}
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
deleted file mode 100644
index 98b66a54c319..000000000000
--- a/fs/cifs/md5.c
+++ /dev/null
@@ -1,366 +0,0 @@
1/*
2 * This code implements the MD5 message-digest algorithm.
3 * The algorithm is due to Ron Rivest. This code was
4 * written by Colin Plumb in 1993, no copyright is claimed.
5 * This code is in the public domain; do with it what you wish.
6 *
7 * Equivalent code is available from RSA Data Security, Inc.
8 * This code has been tested against that, and is equivalent,
9 * except that you don't need to include two pages of legalese
10 * with every copy.
11 *
12 * To compute the message digest of a chunk of bytes, declare an
13 * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as
14 * needed on buffers full of bytes, and then call cifs_MD5_final, which
15 * will fill a supplied 16-byte array with the digest.
16 */
17
18/* This code slightly modified to fit into Samba by
19 abartlet@samba.org Jun 2001
20 and to fit the cifs vfs by
21 Steve French sfrench@us.ibm.com */
22
23#include <linux/string.h>
24#include "md5.h"
25
26static void MD5Transform(__u32 buf[4], __u32 const in[16]);
27
28/*
29 * Note: this code is harmless on little-endian machines.
30 */
31static void
32byteReverse(unsigned char *buf, unsigned longs)
33{
34 __u32 t;
35 do {
36 t = (__u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
37 ((unsigned) buf[1] << 8 | buf[0]);
38 *(__u32 *) buf = t;
39 buf += 4;
40 } while (--longs);
41}
42
43/*
44 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
45 * initialization constants.
46 */
47void
48cifs_MD5_init(struct MD5Context *ctx)
49{
50 ctx->buf[0] = 0x67452301;
51 ctx->buf[1] = 0xefcdab89;
52 ctx->buf[2] = 0x98badcfe;
53 ctx->buf[3] = 0x10325476;
54
55 ctx->bits[0] = 0;
56 ctx->bits[1] = 0;
57}
58
59/*
60 * Update context to reflect the concatenation of another buffer full
61 * of bytes.
62 */
63void
64cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
65{
66 register __u32 t;
67
68 /* Update bitcount */
69
70 t = ctx->bits[0];
71 if ((ctx->bits[0] = t + ((__u32) len << 3)) < t)
72 ctx->bits[1]++; /* Carry from low to high */
73 ctx->bits[1] += len >> 29;
74
75 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
76
77 /* Handle any leading odd-sized chunks */
78
79 if (t) {
80 unsigned char *p = (unsigned char *) ctx->in + t;
81
82 t = 64 - t;
83 if (len < t) {
84 memmove(p, buf, len);
85 return;
86 }
87 memmove(p, buf, t);
88 byteReverse(ctx->in, 16);
89 MD5Transform(ctx->buf, (__u32 *) ctx->in);
90 buf += t;
91 len -= t;
92 }
93 /* Process data in 64-byte chunks */
94
95 while (len >= 64) {
96 memmove(ctx->in, buf, 64);
97 byteReverse(ctx->in, 16);
98 MD5Transform(ctx->buf, (__u32 *) ctx->in);
99 buf += 64;
100 len -= 64;
101 }
102
103 /* Handle any remaining bytes of data. */
104
105 memmove(ctx->in, buf, len);
106}
107
108/*
109 * Final wrapup - pad to 64-byte boundary with the bit pattern
110 * 1 0* (64-bit count of bits processed, MSB-first)
111 */
112void
113cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx)
114{
115 unsigned int count;
116 unsigned char *p;
117
118 /* Compute number of bytes mod 64 */
119 count = (ctx->bits[0] >> 3) & 0x3F;
120
121 /* Set the first char of padding to 0x80. This is safe since there is
122 always at least one byte free */
123 p = ctx->in + count;
124 *p++ = 0x80;
125
126 /* Bytes of padding needed to make 64 bytes */
127 count = 64 - 1 - count;
128
129 /* Pad out to 56 mod 64 */
130 if (count < 8) {
131 /* Two lots of padding: Pad the first block to 64 bytes */
132 memset(p, 0, count);
133 byteReverse(ctx->in, 16);
134 MD5Transform(ctx->buf, (__u32 *) ctx->in);
135
136 /* Now fill the next block with 56 bytes */
137 memset(ctx->in, 0, 56);
138 } else {
139 /* Pad block to 56 bytes */
140 memset(p, 0, count - 8);
141 }
142 byteReverse(ctx->in, 14);
143
144 /* Append length in bits and transform */
145 ((__u32 *) ctx->in)[14] = ctx->bits[0];
146 ((__u32 *) ctx->in)[15] = ctx->bits[1];
147
148 MD5Transform(ctx->buf, (__u32 *) ctx->in);
149 byteReverse((unsigned char *) ctx->buf, 4);
150 memmove(digest, ctx->buf, 16);
151 memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
152}
153
154/* The four core functions - F1 is optimized somewhat */
155
156/* #define F1(x, y, z) (x & y | ~x & z) */
157#define F1(x, y, z) (z ^ (x & (y ^ z)))
158#define F2(x, y, z) F1(z, x, y)
159#define F3(x, y, z) (x ^ y ^ z)
160#define F4(x, y, z) (y ^ (x | ~z))
161
162/* This is the central step in the MD5 algorithm. */
163#define MD5STEP(f, w, x, y, z, data, s) \
164 (w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x)
165
166/*
167 * The core of the MD5 algorithm, this alters an existing MD5 hash to
168 * reflect the addition of 16 longwords of new data. cifs_MD5_update blocks
169 * the data and converts bytes into longwords for this routine.
170 */
171static void
172MD5Transform(__u32 buf[4], __u32 const in[16])
173{
174 register __u32 a, b, c, d;
175
176 a = buf[0];
177 b = buf[1];
178 c = buf[2];
179 d = buf[3];
180
181 MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
182 MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
183 MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
184 MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
185 MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
186 MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
187 MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
188 MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
189 MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
190 MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
191 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
192 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
193 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
194 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
195 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
196 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
197
198 MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
199 MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
200 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
201 MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
202 MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
203 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
204 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
205 MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
206 MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
207 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
208 MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
209 MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
210 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
211 MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
212 MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
213 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
214
215 MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
216 MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
217 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
218 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
219 MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
220 MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
221 MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
222 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
223 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
224 MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
225 MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
226 MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
227 MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
228 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
229 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
230 MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
231
232 MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
233 MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
234 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
235 MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
236 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
237 MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
238 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
239 MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
240 MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
241 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
242 MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
243 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
244 MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
245 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
246 MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
247 MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
248
249 buf[0] += a;
250 buf[1] += b;
251 buf[2] += c;
252 buf[3] += d;
253}
254
255#if 0 /* currently unused */
256/***********************************************************************
257 the rfc 2104 version of hmac_md5 initialisation.
258***********************************************************************/
259static void
260hmac_md5_init_rfc2104(unsigned char *key, int key_len,
261 struct HMACMD5Context *ctx)
262{
263 int i;
264
265 /* if key is longer than 64 bytes reset it to key=MD5(key) */
266 if (key_len > 64) {
267 unsigned char tk[16];
268 struct MD5Context tctx;
269
270 cifs_MD5_init(&tctx);
271 cifs_MD5_update(&tctx, key, key_len);
272 cifs_MD5_final(tk, &tctx);
273
274 key = tk;
275 key_len = 16;
276 }
277
278 /* start out by storing key in pads */
279 memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
280 memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
281 memcpy(ctx->k_ipad, key, key_len);
282 memcpy(ctx->k_opad, key, key_len);
283
284 /* XOR key with ipad and opad values */
285 for (i = 0; i < 64; i++) {
286 ctx->k_ipad[i] ^= 0x36;
287 ctx->k_opad[i] ^= 0x5c;
288 }
289
290 cifs_MD5_init(&ctx->ctx);
291 cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
292}
293#endif
294
295/***********************************************************************
296 the microsoft version of hmac_md5 initialisation.
297***********************************************************************/
298void
299hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
300 struct HMACMD5Context *ctx)
301{
302 int i;
303
304 /* if key is longer than 64 bytes truncate it */
305 if (key_len > 64)
306 key_len = 64;
307
308 /* start out by storing key in pads */
309 memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
310 memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
311 memcpy(ctx->k_ipad, key, key_len);
312 memcpy(ctx->k_opad, key, key_len);
313
314 /* XOR key with ipad and opad values */
315 for (i = 0; i < 64; i++) {
316 ctx->k_ipad[i] ^= 0x36;
317 ctx->k_opad[i] ^= 0x5c;
318 }
319
320 cifs_MD5_init(&ctx->ctx);
321 cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
322}
323
324/***********************************************************************
325 update hmac_md5 "inner" buffer
326***********************************************************************/
327void
328hmac_md5_update(const unsigned char *text, int text_len,
329 struct HMACMD5Context *ctx)
330{
331 cifs_MD5_update(&ctx->ctx, text, text_len); /* then text of datagram */
332}
333
334/***********************************************************************
335 finish off hmac_md5 "inner" buffer and generate outer one.
336***********************************************************************/
337void
338hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
339{
340 struct MD5Context ctx_o;
341
342 cifs_MD5_final(digest, &ctx->ctx);
343
344 cifs_MD5_init(&ctx_o);
345 cifs_MD5_update(&ctx_o, ctx->k_opad, 64);
346 cifs_MD5_update(&ctx_o, digest, 16);
347 cifs_MD5_final(digest, &ctx_o);
348}
349
350/***********************************************************
351 single function to calculate an HMAC MD5 digest from data.
352 use the microsoft hmacmd5 init method because the key is 16 bytes.
353************************************************************/
354#if 0 /* currently unused */
355static void
356hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
357 unsigned char *digest)
358{
359 struct HMACMD5Context ctx;
360 hmac_md5_init_limK_to_64(key, 16, &ctx);
361 if (data_len != 0)
362 hmac_md5_update(data, data_len, &ctx);
363
364 hmac_md5_final(digest, &ctx);
365}
366#endif
diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h
deleted file mode 100644
index 6fba8cb402fd..000000000000
--- a/fs/cifs/md5.h
+++ /dev/null
@@ -1,38 +0,0 @@
1#ifndef MD5_H
2#define MD5_H
3#ifndef HEADER_MD5_H
4/* Try to avoid clashes with OpenSSL */
5#define HEADER_MD5_H
6#endif
7
8struct MD5Context {
9 __u32 buf[4];
10 __u32 bits[2];
11 unsigned char in[64];
12};
13#endif /* !MD5_H */
14
15#ifndef _HMAC_MD5_H
16struct HMACMD5Context {
17 struct MD5Context ctx;
18 unsigned char k_ipad[65];
19 unsigned char k_opad[65];
20};
21#endif /* _HMAC_MD5_H */
22
23void cifs_MD5_init(struct MD5Context *context);
24void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf,
25 unsigned len);
26void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context);
27
28/* The following definitions come from lib/hmacmd5.c */
29
30/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len,
31 struct HMACMD5Context *ctx);*/
32void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
33 struct HMACMD5Context *ctx);
34void hmac_md5_update(const unsigned char *text, int text_len,
35 struct HMACMD5Context *ctx);
36void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx);
37/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
38 unsigned char *digest);*/
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index a09e077ba925..2a930a752a78 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -236,10 +236,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
236{ 236{
237 __u16 mid = 0; 237 __u16 mid = 0;
238 __u16 last_mid; 238 __u16 last_mid;
239 int collision; 239 bool collision;
240
241 if (server == NULL)
242 return mid;
243 240
244 spin_lock(&GlobalMid_Lock); 241 spin_lock(&GlobalMid_Lock);
245 last_mid = server->CurrentMid; /* we do not want to loop forever */ 242 last_mid = server->CurrentMid; /* we do not want to loop forever */
@@ -252,24 +249,38 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
252 (and it would also have to have been a request that 249 (and it would also have to have been a request that
253 did not time out) */ 250 did not time out) */
254 while (server->CurrentMid != last_mid) { 251 while (server->CurrentMid != last_mid) {
255 struct list_head *tmp;
256 struct mid_q_entry *mid_entry; 252 struct mid_q_entry *mid_entry;
253 unsigned int num_mids;
257 254
258 collision = 0; 255 collision = false;
259 if (server->CurrentMid == 0) 256 if (server->CurrentMid == 0)
260 server->CurrentMid++; 257 server->CurrentMid++;
261 258
262 list_for_each(tmp, &server->pending_mid_q) { 259 num_mids = 0;
263 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 260 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
264 261 ++num_mids;
265 if ((mid_entry->mid == server->CurrentMid) && 262 if (mid_entry->mid == server->CurrentMid &&
266 (mid_entry->midState == MID_REQUEST_SUBMITTED)) { 263 mid_entry->midState == MID_REQUEST_SUBMITTED) {
267 /* This mid is in use, try a different one */ 264 /* This mid is in use, try a different one */
268 collision = 1; 265 collision = true;
269 break; 266 break;
270 } 267 }
271 } 268 }
272 if (collision == 0) { 269
270 /*
271 * if we have more than 32k mids in the list, then something
272 * is very wrong. Possibly a local user is trying to DoS the
273 * box by issuing long-running calls and SIGKILL'ing them. If
274 * we get to 2^16 mids then we're in big trouble as this
275 * function could loop forever.
276 *
277 * Go ahead and assign out the mid in this situation, but force
278 * an eventual reconnect to clean out the pending_mid_q.
279 */
280 if (num_mids > 32768)
281 server->tcpStatus = CifsNeedReconnect;
282
283 if (!collision) {
273 mid = server->CurrentMid; 284 mid = server->CurrentMid;
274 break; 285 break;
275 } 286 }
@@ -381,29 +392,31 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
381} 392}
382 393
383static int 394static int
384checkSMBhdr(struct smb_hdr *smb, __u16 mid) 395check_smb_hdr(struct smb_hdr *smb, __u16 mid)
385{ 396{
386 /* Make sure that this really is an SMB, that it is a response, 397 /* does it have the right SMB "signature" ? */
387 and that the message ids match */ 398 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) {
388 if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) && 399 cERROR(1, "Bad protocol string signature header 0x%x",
389 (mid == smb->Mid)) { 400 *(unsigned int *)smb->Protocol);
390 if (smb->Flags & SMBFLG_RESPONSE) 401 return 1;
391 return 0; 402 }
392 else { 403
393 /* only one valid case where server sends us request */ 404 /* Make sure that message ids match */
394 if (smb->Command == SMB_COM_LOCKING_ANDX) 405 if (mid != smb->Mid) {
395 return 0; 406 cERROR(1, "Mids do not match. received=%u expected=%u",
396 else 407 smb->Mid, mid);
397 cERROR(1, "Received Request not response"); 408 return 1;
398 }
399 } else { /* bad signature or mid */
400 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff))
401 cERROR(1, "Bad protocol string signature header %x",
402 *(unsigned int *) smb->Protocol);
403 if (mid != smb->Mid)
404 cERROR(1, "Mids do not match");
405 } 409 }
406 cERROR(1, "bad smb detected. The Mid=%d", smb->Mid); 410
411 /* if it's a response then accept */
412 if (smb->Flags & SMBFLG_RESPONSE)
413 return 0;
414
415 /* only one valid case where server sends us request */
416 if (smb->Command == SMB_COM_LOCKING_ANDX)
417 return 0;
418
419 cERROR(1, "Server sent request, not response. mid=%u", smb->Mid);
407 return 1; 420 return 1;
408} 421}
409 422
@@ -448,7 +461,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
448 return 1; 461 return 1;
449 } 462 }
450 463
451 if (checkSMBhdr(smb, mid)) 464 if (check_smb_hdr(smb, mid))
452 return 1; 465 return 1;
453 clc_len = smbCalcSize_LE(smb); 466 clc_len = smbCalcSize_LE(smb);
454 467
@@ -465,25 +478,26 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
465 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) 478 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF))
466 return 0; /* bcc wrapped */ 479 return 0; /* bcc wrapped */
467 } 480 }
468 cFYI(1, "Calculated size %d vs length %d mismatch for mid %d", 481 cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u",
469 clc_len, 4 + len, smb->Mid); 482 clc_len, 4 + len, smb->Mid);
470 /* Windows XP can return a few bytes too much, presumably 483
471 an illegal pad, at the end of byte range lock responses 484 if (4 + len < clc_len) {
472 so we allow for that three byte pad, as long as actual 485 cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u",
473 received length is as long or longer than calculated length */
474 /* We have now had to extend this more, since there is a
475 case in which it needs to be bigger still to handle a
476 malformed response to transact2 findfirst from WinXP when
477 access denied is returned and thus bcc and wct are zero
478 but server says length is 0x21 bytes too long as if the server
479 forget to reset the smb rfc1001 length when it reset the
480 wct and bcc to minimum size and drop the t2 parms and data */
481 if ((4+len > clc_len) && (len <= clc_len + 512))
482 return 0;
483 else {
484 cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d",
485 len, smb->Mid); 486 len, smb->Mid);
486 return 1; 487 return 1;
488 } else if (len > clc_len + 512) {
489 /*
490 * Some servers (Windows XP in particular) send more
491 * data than the lengths in the SMB packet would
492 * indicate on certain calls (byte range locks and
493 * trans2 find first calls in particular). While the
494 * client can handle such a frame by ignoring the
495 * trailing data, we choose limit the amount of extra
496 * data to 512 bytes.
497 */
498 cERROR(1, "RFC1001 size %u more than 512 bytes larger "
499 "than SMB for mid=%u", len, smb->Mid);
500 return 1;
487 } 501 }
488 } 502 }
489 return 0; 503 return 0;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 7f25cc3d2256..f8e4cd2a7912 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -764,7 +764,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
764{ 764{
765 int rc = 0; 765 int rc = 0;
766 int xid, i; 766 int xid, i;
767 struct cifs_sb_info *cifs_sb;
768 struct cifsTconInfo *pTcon; 767 struct cifsTconInfo *pTcon;
769 struct cifsFileInfo *cifsFile = NULL; 768 struct cifsFileInfo *cifsFile = NULL;
770 char *current_entry; 769 char *current_entry;
@@ -775,8 +774,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
775 774
776 xid = GetXid(); 775 xid = GetXid();
777 776
778 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
779
780 /* 777 /*
781 * Ensure FindFirst doesn't fail before doing filldir() for '.' and 778 * Ensure FindFirst doesn't fail before doing filldir() for '.' and
782 * '..'. Otherwise we won't be able to notify VFS in case of failure. 779 * '..'. Otherwise we won't be able to notify VFS in case of failure.
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index b6b6dcb500bf..04721485925d 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -45,7 +45,6 @@
45 up with a different answer to the one above) 45 up with a different answer to the one above)
46*/ 46*/
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include "cifsencrypt.h"
49#define uchar unsigned char 48#define uchar unsigned char
50 49
51static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9, 50static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 192ea51af20f..b5041c849981 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -32,9 +32,8 @@
32#include "cifs_unicode.h" 32#include "cifs_unicode.h"
33#include "cifspdu.h" 33#include "cifspdu.h"
34#include "cifsglob.h" 34#include "cifsglob.h"
35#include "md5.h"
36#include "cifs_debug.h" 35#include "cifs_debug.h"
37#include "cifsencrypt.h" 36#include "cifsproto.h"
38 37
39#ifndef false 38#ifndef false
40#define false 0 39#define false 0
@@ -48,14 +47,58 @@
48#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) 47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
49#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) 48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
50 49
51/*The following definitions come from libsmb/smbencrypt.c */ 50/* produce a md4 message digest from data of length n bytes */
51int
52mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
53{
54 int rc;
55 unsigned int size;
56 struct crypto_shash *md4;
57 struct sdesc *sdescmd4;
58
59 md4 = crypto_alloc_shash("md4", 0, 0);
60 if (IS_ERR(md4)) {
61 rc = PTR_ERR(md4);
62 cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc);
63 return rc;
64 }
65 size = sizeof(struct shash_desc) + crypto_shash_descsize(md4);
66 sdescmd4 = kmalloc(size, GFP_KERNEL);
67 if (!sdescmd4) {
68 rc = -ENOMEM;
69 cERROR(1, "%s: Memory allocation failure\n", __func__);
70 goto mdfour_err;
71 }
72 sdescmd4->shash.tfm = md4;
73 sdescmd4->shash.flags = 0x0;
74
75 rc = crypto_shash_init(&sdescmd4->shash);
76 if (rc) {
77 cERROR(1, "%s: Could not init md4 shash\n", __func__);
78 goto mdfour_err;
79 }
80 crypto_shash_update(&sdescmd4->shash, link_str, link_len);
81 rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
52 82
53void SMBencrypt(unsigned char *passwd, const unsigned char *c8, 83mdfour_err:
54 unsigned char *p24); 84 crypto_free_shash(md4);
55void E_md4hash(const unsigned char *passwd, unsigned char *p16); 85 kfree(sdescmd4);
56static void SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8, 86
57 unsigned char p24[24]); 87 return rc;
58void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); 88}
89
90/* Does the des encryption from the NT or LM MD4 hash. */
91static void
92SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
93 unsigned char p24[24])
94{
95 unsigned char p21[21];
96
97 memset(p21, '\0', 21);
98
99 memcpy(p21, passwd, 16);
100 E_P24(p21, c8, p24);
101}
59 102
60/* 103/*
61 This implements the X/Open SMB password encryption 104 This implements the X/Open SMB password encryption
@@ -118,9 +161,10 @@ _my_mbstowcs(__u16 *dst, const unsigned char *src, int len)
118 * Creates the MD4 Hash of the users password in NT UNICODE. 161 * Creates the MD4 Hash of the users password in NT UNICODE.
119 */ 162 */
120 163
121void 164int
122E_md4hash(const unsigned char *passwd, unsigned char *p16) 165E_md4hash(const unsigned char *passwd, unsigned char *p16)
123{ 166{
167 int rc;
124 int len; 168 int len;
125 __u16 wpwd[129]; 169 __u16 wpwd[129];
126 170
@@ -139,8 +183,10 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16)
139 /* Calculate length in bytes */ 183 /* Calculate length in bytes */
140 len = _my_wcslen(wpwd) * sizeof(__u16); 184 len = _my_wcslen(wpwd) * sizeof(__u16);
141 185
142 mdfour(p16, (unsigned char *) wpwd, len); 186 rc = mdfour(p16, (unsigned char *) wpwd, len);
143 memset(wpwd, 0, 129 * 2); 187 memset(wpwd, 0, 129 * 2);
188
189 return rc;
144} 190}
145 191
146#if 0 /* currently unused */ 192#if 0 /* currently unused */
@@ -212,19 +258,6 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n,
212} 258}
213#endif 259#endif
214 260
215/* Does the des encryption from the NT or LM MD4 hash. */
216static void
217SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
218 unsigned char p24[24])
219{
220 unsigned char p21[21];
221
222 memset(p21, '\0', 21);
223
224 memcpy(p21, passwd, 16);
225 E_P24(p21, c8, p24);
226}
227
228/* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */ 261/* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */
229#if 0 /* currently unused */ 262#if 0 /* currently unused */
230static void 263static void
@@ -242,16 +275,21 @@ NTLMSSPOWFencrypt(unsigned char passwd[8],
242#endif 275#endif
243 276
244/* Does the NT MD4 hash then des encryption. */ 277/* Does the NT MD4 hash then des encryption. */
245 278int
246void
247SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 279SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
248{ 280{
281 int rc;
249 unsigned char p21[21]; 282 unsigned char p21[21];
250 283
251 memset(p21, '\0', 21); 284 memset(p21, '\0', 21);
252 285
253 E_md4hash(passwd, p21); 286 rc = E_md4hash(passwd, p21);
287 if (rc) {
288 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
289 return rc;
290 }
254 SMBOWFencrypt(p21, c8, p24); 291 SMBOWFencrypt(p21, c8, p24);
292 return rc;
255} 293}
256 294
257 295
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c1ccca1a933f..46d8756f2b24 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -236,9 +236,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
236 server->tcpStatus = CifsNeedReconnect; 236 server->tcpStatus = CifsNeedReconnect;
237 } 237 }
238 238
239 if (rc < 0) { 239 if (rc < 0 && rc != -EINTR)
240 cERROR(1, "Error %d sending data on socket to server", rc); 240 cERROR(1, "Error %d sending data on socket to server", rc);
241 } else 241 else
242 rc = 0; 242 rc = 0;
243 243
244 /* Don't want to modify the buffer as a 244 /* Don't want to modify the buffer as a
@@ -359,6 +359,10 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
359 if (rc) 359 if (rc)
360 return rc; 360 return rc;
361 361
362 /* enable signing if server requires it */
363 if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
364 in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
365
362 mutex_lock(&server->srv_mutex); 366 mutex_lock(&server->srv_mutex);
363 mid = AllocMidQEntry(in_buf, server); 367 mid = AllocMidQEntry(in_buf, server);
364 if (mid == NULL) { 368 if (mid == NULL) {
@@ -453,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
453 case MID_RETRY_NEEDED: 457 case MID_RETRY_NEEDED:
454 rc = -EAGAIN; 458 rc = -EAGAIN;
455 break; 459 break;
460 case MID_RESPONSE_MALFORMED:
461 rc = -EIO;
462 break;
456 default: 463 default:
457 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, 464 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
458 mid->mid, mid->midState); 465 mid->mid, mid->midState);
@@ -570,17 +577,33 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
570#endif 577#endif
571 578
572 mutex_unlock(&ses->server->srv_mutex); 579 mutex_unlock(&ses->server->srv_mutex);
573 cifs_small_buf_release(in_buf);
574 580
575 if (rc < 0) 581 if (rc < 0) {
582 cifs_small_buf_release(in_buf);
576 goto out; 583 goto out;
584 }
577 585
578 if (long_op == CIFS_ASYNC_OP) 586 if (long_op == CIFS_ASYNC_OP) {
587 cifs_small_buf_release(in_buf);
579 goto out; 588 goto out;
589 }
580 590
581 rc = wait_for_response(ses->server, midQ); 591 rc = wait_for_response(ses->server, midQ);
582 if (rc != 0) 592 if (rc != 0) {
583 goto out; 593 send_nt_cancel(ses->server, in_buf, midQ);
594 spin_lock(&GlobalMid_Lock);
595 if (midQ->midState == MID_REQUEST_SUBMITTED) {
596 midQ->callback = DeleteMidQEntry;
597 spin_unlock(&GlobalMid_Lock);
598 cifs_small_buf_release(in_buf);
599 atomic_dec(&ses->server->inFlight);
600 wake_up(&ses->server->request_q);
601 return rc;
602 }
603 spin_unlock(&GlobalMid_Lock);
604 }
605
606 cifs_small_buf_release(in_buf);
584 607
585 rc = sync_mid_result(midQ, ses->server); 608 rc = sync_mid_result(midQ, ses->server);
586 if (rc != 0) { 609 if (rc != 0) {
@@ -724,8 +747,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
724 goto out; 747 goto out;
725 748
726 rc = wait_for_response(ses->server, midQ); 749 rc = wait_for_response(ses->server, midQ);
727 if (rc != 0) 750 if (rc != 0) {
728 goto out; 751 send_nt_cancel(ses->server, in_buf, midQ);
752 spin_lock(&GlobalMid_Lock);
753 if (midQ->midState == MID_REQUEST_SUBMITTED) {
754 /* no longer considered to be "in-flight" */
755 midQ->callback = DeleteMidQEntry;
756 spin_unlock(&GlobalMid_Lock);
757 atomic_dec(&ses->server->inFlight);
758 wake_up(&ses->server->request_q);
759 return rc;
760 }
761 spin_unlock(&GlobalMid_Lock);
762 }
729 763
730 rc = sync_mid_result(midQ, ses->server); 764 rc = sync_mid_result(midQ, ses->server);
731 if (rc != 0) { 765 if (rc != 0) {
@@ -922,10 +956,21 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
922 } 956 }
923 } 957 }
924 958
925 if (wait_for_response(ses->server, midQ) == 0) { 959 rc = wait_for_response(ses->server, midQ);
926 /* We got the response - restart system call. */ 960 if (rc) {
927 rstart = 1; 961 send_nt_cancel(ses->server, in_buf, midQ);
962 spin_lock(&GlobalMid_Lock);
963 if (midQ->midState == MID_REQUEST_SUBMITTED) {
964 /* no longer considered to be "in-flight" */
965 midQ->callback = DeleteMidQEntry;
966 spin_unlock(&GlobalMid_Lock);
967 return rc;
968 }
969 spin_unlock(&GlobalMid_Lock);
928 } 970 }
971
972 /* We got the response - restart system call. */
973 rstart = 1;
929 } 974 }
930 975
931 rc = sync_mid_result(midQ, ses->server); 976 rc = sync_mid_result(midQ, ses->server);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9c64ae9e4c1a..2d8c87b951c2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,15 +1468,13 @@ static void work_stop(void)
1468 1468
1469static int work_start(void) 1469static int work_start(void)
1470{ 1470{
1471 recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | 1471 recv_workqueue = create_singlethread_workqueue("dlm_recv");
1472 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1473 if (!recv_workqueue) { 1472 if (!recv_workqueue) {
1474 log_print("can't start dlm_recv"); 1473 log_print("can't start dlm_recv");
1475 return -ENOMEM; 1474 return -ENOMEM;
1476 } 1475 }
1477 1476
1478 send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | 1477 send_workqueue = create_singlethread_workqueue("dlm_send");
1479 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1480 if (!send_workqueue) { 1478 if (!send_workqueue) {
1481 log_print("can't start dlm_send"); 1479 log_print("can't start dlm_send");
1482 destroy_workqueue(recv_workqueue); 1480 destroy_workqueue(recv_workqueue);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cc8a9b7d6064..267d0ada4541 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1114,6 +1114,17 @@ static int ep_send_events(struct eventpoll *ep,
1114 return ep_scan_ready_list(ep, ep_send_events_proc, &esed); 1114 return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
1115} 1115}
1116 1116
1117static inline struct timespec ep_set_mstimeout(long ms)
1118{
1119 struct timespec now, ts = {
1120 .tv_sec = ms / MSEC_PER_SEC,
1121 .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
1122 };
1123
1124 ktime_get_ts(&now);
1125 return timespec_add_safe(now, ts);
1126}
1127
1117static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1128static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1118 int maxevents, long timeout) 1129 int maxevents, long timeout)
1119{ 1130{
@@ -1121,12 +1132,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1121 unsigned long flags; 1132 unsigned long flags;
1122 long slack; 1133 long slack;
1123 wait_queue_t wait; 1134 wait_queue_t wait;
1124 struct timespec end_time;
1125 ktime_t expires, *to = NULL; 1135 ktime_t expires, *to = NULL;
1126 1136
1127 if (timeout > 0) { 1137 if (timeout > 0) {
1128 ktime_get_ts(&end_time); 1138 struct timespec end_time = ep_set_mstimeout(timeout);
1129 timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); 1139
1130 slack = select_estimate_accuracy(&end_time); 1140 slack = select_estimate_accuracy(&end_time);
1131 to = &expires; 1141 to = &expires;
1132 *to = timespec_to_ktime(end_time); 1142 *to = timespec_to_ktime(end_time);
diff --git a/fs/exec.c b/fs/exec.c
index c62efcb959c7..52a447d9b6ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -120,7 +120,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
120 goto out; 120 goto out;
121 121
122 file = do_filp_open(AT_FDCWD, tmp, 122 file = do_filp_open(AT_FDCWD, tmp,
123 O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, 123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN); 124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 125 putname(tmp);
126 error = PTR_ERR(file); 126 error = PTR_ERR(file);
@@ -723,7 +723,7 @@ struct file *open_exec(const char *name)
723 int err; 723 int err;
724 724
725 file = do_filp_open(AT_FDCWD, name, 725 file = do_filp_open(AT_FDCWD, name,
726 O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, 726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN); 727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 728 if (IS_ERR(file))
729 goto out; 729 goto out;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 42685424817b..a7555238c41a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1031 } 1031 }
1032 1032
1033 inode->i_mapping->backing_dev_info = sb->s_bdi;
1034 if (S_ISREG(inode->i_mode)) { 1033 if (S_ISREG(inode->i_mode)) {
1035 inode->i_op = &exofs_file_inode_operations; 1034 inode->i_op = &exofs_file_inode_operations;
1036 inode->i_fop = &exofs_file_operations; 1035 inode->i_fop = &exofs_file_operations;
@@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1131 1130
1132 sbi = sb->s_fs_info; 1131 sbi = sb->s_fs_info;
1133 1132
1134 inode->i_mapping->backing_dev_info = sb->s_bdi;
1135 sb->s_dirt = 1; 1133 sb->s_dirt = 1;
1136 inode_init_owner(inode, dir, mode); 1134 inode_init_owner(inode, dir, mode);
1137 inode->i_ino = sbi->s_nextid++; 1135 inode->i_ino = sbi->s_nextid++;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c8d97b56f34..3aa0b72b3b94 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
849 /* current io_end structure for async DIO write*/ 849 /* current io_end structure for async DIO write*/
850 ext4_io_end_t *cur_aio_dio; 850 ext4_io_end_t *cur_aio_dio;
851 atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
851 852
852 spinlock_t i_block_reservation_lock; 853 spinlock_t i_block_reservation_lock;
853 854
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2119 2120
2120#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2121#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2121 2122
2123/* For ioend & aio unwritten conversion wait queues */
2124#define EXT4_WQ_HASH_SZ 37
2125#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
2126 EXT4_WQ_HASH_SZ])
2127#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
2128 EXT4_WQ_HASH_SZ])
2129extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
2130extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
2131
2122#endif /* __KERNEL__ */ 2132#endif /* __KERNEL__ */
2123 2133
2124#endif /* _EXT4_H */ 2134#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63a75810b7c3..ccce8a7e94ed 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3174 * that this IO needs to convertion to written when IO is 3174 * that this IO needs to convertion to written when IO is
3175 * completed 3175 * completed
3176 */ 3176 */
3177 if (io) 3177 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3178 io->flag = EXT4_IO_END_UNWRITTEN; 3178 io->flag = EXT4_IO_END_UNWRITTEN;
3179 else 3179 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3180 } else
3180 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3181 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3181 if (ext4_should_dioread_nolock(inode)) 3182 if (ext4_should_dioread_nolock(inode))
3182 map->m_flags |= EXT4_MAP_UNINIT; 3183 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3463 * that we need to perform convertion when IO is done. 3464 * that we need to perform convertion when IO is done.
3464 */ 3465 */
3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3466 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3466 if (io) 3467 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3467 io->flag = EXT4_IO_END_UNWRITTEN; 3468 io->flag = EXT4_IO_END_UNWRITTEN;
3468 else 3469 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3470 } else
3469 ext4_set_inode_state(inode, 3471 ext4_set_inode_state(inode,
3470 EXT4_STATE_DIO_UNWRITTEN); 3472 EXT4_STATE_DIO_UNWRITTEN);
3471 } 3473 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2e8322c8aa88..7b80d543b89e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_aiodio_wait(struct inode *inode)
59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61
62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
63}
64
65/*
66 * This tests whether the IO in question is block-aligned or not.
67 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
68 * are converted to written only after the IO is complete. Until they are
69 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
70 * it needs to zero out portions of the start and/or end block. If 2 AIO
71 * threads are at work on the same unwritten block, they must be synchronized
72 * or one thread will zero the other's data, causing corruption.
73 */
74static int
75ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
76 unsigned long nr_segs, loff_t pos)
77{
78 struct super_block *sb = inode->i_sb;
79 int blockmask = sb->s_blocksize - 1;
80 size_t count = iov_length(iov, nr_segs);
81 loff_t final_size = pos + count;
82
83 if (pos >= inode->i_size)
84 return 0;
85
86 if ((pos & blockmask) || (final_size & blockmask))
87 return 1;
88
89 return 0;
90}
91
58static ssize_t 92static ssize_t
59ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 93ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
60 unsigned long nr_segs, loff_t pos) 94 unsigned long nr_segs, loff_t pos)
61{ 95{
62 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 96 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
97 int unaligned_aio = 0;
98 int ret;
63 99
64 /* 100 /*
65 * If we have encountered a bitmap-format file, the size limit 101 * If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
78 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, 114 nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
79 sbi->s_bitmap_maxbytes - pos); 115 sbi->s_bitmap_maxbytes - pos);
80 } 116 }
117 } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
118 !is_sync_kiocb(iocb))) {
119 unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
81 } 120 }
82 121
83 return generic_file_aio_write(iocb, iov, nr_segs, pos); 122 /* Unaligned direct AIO must be serialized; see comment above */
123 if (unaligned_aio) {
124 static unsigned long unaligned_warn_time;
125
126 /* Warn about this once per day */
127 if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
128 ext4_msg(inode->i_sb, KERN_WARNING,
129 "Unaligned AIO/DIO on inode %ld by %s; "
130 "performance will be poor.",
131 inode->i_ino, current->comm);
132 mutex_lock(ext4_aio_mutex(inode));
133 ext4_aiodio_wait(inode);
134 }
135
136 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
137
138 if (unaligned_aio)
139 mutex_unlock(ext4_aio_mutex(inode));
140
141 return ret;
84} 142}
85 143
86static const struct vm_operations_struct ext4_file_vm_ops = { 144static const struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b2f9d2..d1fe09aea73d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
342/* We create slab caches for groupinfo data structures based on the 342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for 343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */ 344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \ 345#define NR_GRPINFO_CACHES 8
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; 346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348 347
348static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
350 ext4_group_t group); 355 ext4_group_t group);
351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
2414 return -ENOMEM; 2419 return -ENOMEM;
2415} 2420}
2416 2421
2422static void ext4_groupinfo_destroy_slabs(void)
2423{
2424 int i;
2425
2426 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2427 if (ext4_groupinfo_caches[i])
2428 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2429 ext4_groupinfo_caches[i] = NULL;
2430 }
2431}
2432
2433static int ext4_groupinfo_create_slab(size_t size)
2434{
2435 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2436 int slab_size;
2437 int blocksize_bits = order_base_2(size);
2438 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2439 struct kmem_cache *cachep;
2440
2441 if (cache_index >= NR_GRPINFO_CACHES)
2442 return -EINVAL;
2443
2444 if (unlikely(cache_index < 0))
2445 cache_index = 0;
2446
2447 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2448 if (ext4_groupinfo_caches[cache_index]) {
2449 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2450 return 0; /* Already created */
2451 }
2452
2453 slab_size = offsetof(struct ext4_group_info,
2454 bb_counters[blocksize_bits + 2]);
2455
2456 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2457 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2458 NULL);
2459
2460 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2461 if (!cachep) {
2462 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
2463 return -ENOMEM;
2464 }
2465
2466 ext4_groupinfo_caches[cache_index] = cachep;
2467
2468 return 0;
2469}
2470
2417int ext4_mb_init(struct super_block *sb, int needs_recovery) 2471int ext4_mb_init(struct super_block *sb, int needs_recovery)
2418{ 2472{
2419 struct ext4_sb_info *sbi = EXT4_SB(sb); 2473 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2421 unsigned offset; 2475 unsigned offset;
2422 unsigned max; 2476 unsigned max;
2423 int ret; 2477 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2427 2478
2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2479 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2429 2480
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 goto out; 2491 goto out;
2441 } 2492 }
2442 2493
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; 2494 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2444 cachep = ext4_groupinfo_caches[cache_index]; 2495 if (ret < 0)
2445 if (!cachep) { 2496 goto out;
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2466 }
2467 2497
2468 /* order 0 is regular bitmap */ 2498 /* order 0 is regular bitmap */
2469 sbi->s_mb_maxs[0] = sb->s_blocksize << 3; 2499 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
2520 if (ret) { 2550 if (ret) {
2521 kfree(sbi->s_mb_offsets); 2551 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs); 2552 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 } 2553 }
2525 return ret; 2554 return ret;
2526} 2555}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
2734 2763
2735void ext4_exit_mballoc(void) 2764void ext4_exit_mballoc(void)
2736{ 2765{
2737 int i;
2738 /* 2766 /*
2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2767 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2740 * before destroying the slab cache. 2768 * before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
2743 kmem_cache_destroy(ext4_pspace_cachep); 2771 kmem_cache_destroy(ext4_pspace_cachep);
2744 kmem_cache_destroy(ext4_ac_cachep); 2772 kmem_cache_destroy(ext4_ac_cachep);
2745 kmem_cache_destroy(ext4_free_ext_cachep); 2773 kmem_cache_destroy(ext4_free_ext_cachep);
2746 2774 ext4_groupinfo_destroy_slabs();
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2755 ext4_remove_debugfs_entry(); 2775 ext4_remove_debugfs_entry();
2756} 2776}
2757 2777
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92a..955cc309142f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
32 32
33static struct kmem_cache *io_page_cachep, *io_end_cachep; 33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34 34
35#define WQ_HASH_SZ 37
36#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
37static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
38
39int __init ext4_init_pageio(void) 35int __init ext4_init_pageio(void)
40{ 36{
41 int i;
42
43 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); 37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
44 if (io_page_cachep == NULL) 38 if (io_page_cachep == NULL)
45 return -ENOMEM; 39 return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
48 kmem_cache_destroy(io_page_cachep); 42 kmem_cache_destroy(io_page_cachep);
49 return -ENOMEM; 43 return -ENOMEM;
50 } 44 }
51 for (i = 0; i < WQ_HASH_SZ; i++)
52 init_waitqueue_head(&ioend_wq[i]);
53
54 return 0; 45 return 0;
55} 46}
56 47
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
62 53
63void ext4_ioend_wait(struct inode *inode) 54void ext4_ioend_wait(struct inode *inode)
64{ 55{
65 wait_queue_head_t *wq = to_ioend_wq(inode); 56 wait_queue_head_t *wq = ext4_ioend_wq(inode);
66 57
67 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 58 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
68} 59}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
87 for (i = 0; i < io->num_io_pages; i++) 78 for (i = 0; i < io->num_io_pages; i++)
88 put_io_page(io->pages[i]); 79 put_io_page(io->pages[i]);
89 io->num_io_pages = 0; 80 io->num_io_pages = 0;
90 wq = to_ioend_wq(io->inode); 81 wq = ext4_ioend_wq(io->inode);
91 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
92 waitqueue_active(wq)) 83 waitqueue_active(wq))
93 wake_up_all(wq); 84 wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
102 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
103 loff_t offset = io->offset; 94 loff_t offset = io->offset;
104 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
105 int ret = 0; 97 int ret = 0;
106 98
107 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
126 if (io->iocb) 118 if (io->iocb)
127 aio_complete(io->iocb, io->result, 0); 119 aio_complete(io->iocb, io->result, 0);
128 /* clear the DIO AIO unwritten flag */ 120 /* clear the DIO AIO unwritten flag */
129 io->flag &= ~EXT4_IO_END_UNWRITTEN; 121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130
130 return ret; 131 return ret;
131} 132}
132 133
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
190 struct inode *inode; 191 struct inode *inode;
191 unsigned long flags; 192 unsigned long flags;
192 int i; 193 int i;
194 sector_t bi_sector = bio->bi_sector;
193 195
194 BUG_ON(!io_end); 196 BUG_ON(!io_end);
195 bio->bi_private = NULL; 197 bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
207 if (error) 209 if (error)
208 SetPageError(page); 210 SetPageError(page);
209 BUG_ON(!head); 211 BUG_ON(!head);
210 if (head->b_size == PAGE_CACHE_SIZE) 212 if (head->b_size != PAGE_CACHE_SIZE) {
211 clear_buffer_dirty(head);
212 else {
213 loff_t offset; 213 loff_t offset;
214 loff_t io_end_offset = io_end->offset + io_end->size; 214 loff_t io_end_offset = io_end->offset + io_end->size;
215 215
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
221 if (error) 221 if (error)
222 buffer_io_error(bh); 222 buffer_io_error(bh);
223 223
224 clear_buffer_dirty(bh);
225 } 224 }
226 if (buffer_delay(bh)) 225 if (buffer_delay(bh))
227 partial_write = 1; 226 partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
257 (unsigned long long) io_end->offset, 256 (unsigned long long) io_end->offset,
258 (long) io_end->size, 257 (long) io_end->size,
259 (unsigned long long) 258 (unsigned long long)
260 bio->bi_sector >> (inode->i_blkbits - 9)); 259 bi_sector >> (inode->i_blkbits - 9));
261 } 260 }
262 261
263 /* Add the io_end to per-inode completed io list*/ 262 /* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
380 379
381 blocksize = 1 << inode->i_blkbits; 380 blocksize = 1 << inode->i_blkbits;
382 381
382 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 383 BUG_ON(PageWriteback(page));
384 set_page_writeback(page); 384 set_page_writeback(page);
385 ClearPageError(page); 385 ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
397 for (bh = head = page_buffers(page), block_start = 0; 397 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 398 bh != head || !block_start;
399 block_start = block_end, bh = bh->b_this_page) { 399 block_start = block_end, bh = bh->b_this_page) {
400
400 block_end = block_start + blocksize; 401 block_end = block_start + blocksize;
401 if (block_start >= len) { 402 if (block_start >= len) {
402 clear_buffer_dirty(bh); 403 clear_buffer_dirty(bh);
403 set_buffer_uptodate(bh); 404 set_buffer_uptodate(bh);
404 continue; 405 continue;
405 } 406 }
407 clear_buffer_dirty(bh);
406 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 408 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
407 if (ret) { 409 if (ret) {
408 /* 410 /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 48ce561fafac..f6a318f836b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void); 78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 79static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void);
80 81
81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 82#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
82static struct file_system_type ext3_fs_type = { 83static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
832 ei->i_sync_tid = 0; 833 ei->i_sync_tid = 0;
833 ei->i_datasync_tid = 0; 834 ei->i_datasync_tid = 0;
834 atomic_set(&ei->i_ioend_count, 0); 835 atomic_set(&ei->i_ioend_count, 0);
836 atomic_set(&ei->i_aiodio_unwritten, 0);
835 837
836 return &ei->vfs_inode; 838 return &ei->vfs_inode;
837} 839}
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
2716 mutex_unlock(&ext4_li_info->li_list_mtx); 2718 mutex_unlock(&ext4_li_info->li_list_mtx);
2717} 2719}
2718 2720
2721static struct task_struct *ext4_lazyinit_task;
2722
2719/* 2723/*
2720 * This is the function where ext4lazyinit thread lives. It walks 2724 * This is the function where ext4lazyinit thread lives. It walks
2721 * through the request list searching for next scheduled filesystem. 2725 * through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
2784 if (time_before(jiffies, next_wakeup)) 2788 if (time_before(jiffies, next_wakeup))
2785 schedule(); 2789 schedule();
2786 finish_wait(&eli->li_wait_daemon, &wait); 2790 finish_wait(&eli->li_wait_daemon, &wait);
2791 if (kthread_should_stop()) {
2792 ext4_clear_request_list();
2793 goto exit_thread;
2794 }
2787 } 2795 }
2788 2796
2789exit_thread: 2797exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
2808 wake_up(&eli->li_wait_task); 2816 wake_up(&eli->li_wait_task);
2809 2817
2810 kfree(ext4_li_info); 2818 kfree(ext4_li_info);
2819 ext4_lazyinit_task = NULL;
2811 ext4_li_info = NULL; 2820 ext4_li_info = NULL;
2812 mutex_unlock(&ext4_li_mtx); 2821 mutex_unlock(&ext4_li_mtx);
2813 2822
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
2830 2839
2831static int ext4_run_lazyinit_thread(void) 2840static int ext4_run_lazyinit_thread(void)
2832{ 2841{
2833 struct task_struct *t; 2842 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2834 2843 ext4_li_info, "ext4lazyinit");
2835 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); 2844 if (IS_ERR(ext4_lazyinit_task)) {
2836 if (IS_ERR(t)) { 2845 int err = PTR_ERR(ext4_lazyinit_task);
2837 int err = PTR_ERR(t);
2838 ext4_clear_request_list(); 2846 ext4_clear_request_list();
2839 del_timer_sync(&ext4_li_info->li_timer); 2847 del_timer_sync(&ext4_li_info->li_timer);
2840 kfree(ext4_li_info); 2848 kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
2985 * If thread exited earlier 2993 * If thread exited earlier
2986 * there's nothing to be done. 2994 * there's nothing to be done.
2987 */ 2995 */
2988 if (!ext4_li_info) 2996 if (!ext4_li_info || !ext4_lazyinit_task)
2989 return; 2997 return;
2990 2998
2991 ext4_clear_request_list(); 2999 kthread_stop(ext4_lazyinit_task);
2992
2993 while (ext4_li_info->li_task) {
2994 wake_up(&ext4_li_info->li_wait_daemon);
2995 wait_event(ext4_li_info->li_wait_task,
2996 ext4_li_info->li_task == NULL);
2997 }
2998} 3000}
2999 3001
3000static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3002static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
4768 .fs_flags = FS_REQUIRES_DEV, 4770 .fs_flags = FS_REQUIRES_DEV,
4769}; 4771};
4770 4772
4771int __init ext4_init_feat_adverts(void) 4773static int __init ext4_init_feat_adverts(void)
4772{ 4774{
4773 struct ext4_features *ef; 4775 struct ext4_features *ef;
4774 int ret = -ENOMEM; 4776 int ret = -ENOMEM;
@@ -4792,23 +4794,44 @@ out:
4792 return ret; 4794 return ret;
4793} 4795}
4794 4796
4797static void ext4_exit_feat_adverts(void)
4798{
4799 kobject_put(&ext4_feat->f_kobj);
4800 wait_for_completion(&ext4_feat->f_kobj_unregister);
4801 kfree(ext4_feat);
4802}
4803
4804/* Shared across all ext4 file systems */
4805wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
4806struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
4807
4795static int __init ext4_init_fs(void) 4808static int __init ext4_init_fs(void)
4796{ 4809{
4797 int err; 4810 int i, err;
4798 4811
4799 ext4_check_flag_values(); 4812 ext4_check_flag_values();
4813
4814 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
4815 mutex_init(&ext4__aio_mutex[i]);
4816 init_waitqueue_head(&ext4__ioend_wq[i]);
4817 }
4818
4800 err = ext4_init_pageio(); 4819 err = ext4_init_pageio();
4801 if (err) 4820 if (err)
4802 return err; 4821 return err;
4803 err = ext4_init_system_zone(); 4822 err = ext4_init_system_zone();
4804 if (err) 4823 if (err)
4805 goto out5; 4824 goto out7;
4806 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4825 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4807 if (!ext4_kset) 4826 if (!ext4_kset)
4808 goto out4; 4827 goto out6;
4809 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4828 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4829 if (!ext4_proc_root)
4830 goto out5;
4810 4831
4811 err = ext4_init_feat_adverts(); 4832 err = ext4_init_feat_adverts();
4833 if (err)
4834 goto out4;
4812 4835
4813 err = ext4_init_mballoc(); 4836 err = ext4_init_mballoc();
4814 if (err) 4837 if (err)
@@ -4838,12 +4861,14 @@ out1:
4838out2: 4861out2:
4839 ext4_exit_mballoc(); 4862 ext4_exit_mballoc();
4840out3: 4863out3:
4841 kfree(ext4_feat); 4864 ext4_exit_feat_adverts();
4865out4:
4842 remove_proc_entry("fs/ext4", NULL); 4866 remove_proc_entry("fs/ext4", NULL);
4867out5:
4843 kset_unregister(ext4_kset); 4868 kset_unregister(ext4_kset);
4844out4: 4869out6:
4845 ext4_exit_system_zone(); 4870 ext4_exit_system_zone();
4846out5: 4871out7:
4847 ext4_exit_pageio(); 4872 ext4_exit_pageio();
4848 return err; 4873 return err;
4849} 4874}
@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
4857 destroy_inodecache(); 4882 destroy_inodecache();
4858 ext4_exit_xattr(); 4883 ext4_exit_xattr();
4859 ext4_exit_mballoc(); 4884 ext4_exit_mballoc();
4885 ext4_exit_feat_adverts();
4860 remove_proc_entry("fs/ext4", NULL); 4886 remove_proc_entry("fs/ext4", NULL);
4861 kset_unregister(ext4_kset); 4887 kset_unregister(ext4_kset);
4862 ext4_exit_system_zone(); 4888 ext4_exit_system_zone();
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ecc8b3954ed6..cb1026181bdc 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -815,7 +815,7 @@ static int __init fcntl_init(void)
815 __O_SYNC | O_DSYNC | FASYNC | 815 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 816 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 FMODE_EXEC 818 __FMODE_EXEC
819 )); 819 ));
820 820
821 fasync_cache = kmem_cache_create("fasync_cache", 821 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/file_table.c b/fs/file_table.c
index c3e89adf53c0..eb36b6b17e26 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -125,13 +125,13 @@ struct file *get_empty_filp(void)
125 goto fail; 125 goto fail;
126 126
127 percpu_counter_inc(&nr_files); 127 percpu_counter_inc(&nr_files);
128 f->f_cred = get_cred(cred);
128 if (security_file_alloc(f)) 129 if (security_file_alloc(f))
129 goto fail_sec; 130 goto fail_sec;
130 131
131 INIT_LIST_HEAD(&f->f_u.fu_list); 132 INIT_LIST_HEAD(&f->f_u.fu_list);
132 atomic_long_set(&f->f_count, 1); 133 atomic_long_set(&f->f_count, 1);
133 rwlock_init(&f->f_owner.lock); 134 rwlock_init(&f->f_owner.lock);
134 f->f_cred = get_cred(cred);
135 spin_lock_init(&f->f_lock); 135 spin_lock_init(&f->f_lock);
136 eventpoll_init_file(f); 136 eventpoll_init_file(f);
137 /* f->f_version: 0 */ 137 /* f->f_version: 0 */
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..7cd9a5a68d59 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void)
1779#endif 1779#endif
1780 1780
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1782 WQ_HIGHPRI | WQ_FREEZABLE, 0);
1783 if (IS_ERR(glock_workqueue)) 1783 if (IS_ERR(glock_workqueue))
1784 return PTR_ERR(glock_workqueue); 1784 return PTR_ERR(glock_workqueue);
1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 1786 WQ_MEM_RECLAIM | WQ_FREEZABLE,
1787 0); 1787 0);
1788 if (IS_ERR(gfs2_delete_workqueue)) { 1788 if (IS_ERR(gfs2_delete_workqueue)) {
1789 destroy_workqueue(glock_workqueue); 1789 destroy_workqueue(glock_workqueue);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..85ba027d1c4d 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -144,7 +144,7 @@ static int __init init_gfs2_fs(void)
144 144
145 error = -ENOMEM; 145 error = -ENOMEM;
146 gfs_recovery_wq = alloc_workqueue("gfs_recovery", 146 gfs_recovery_wq = alloc_workqueue("gfs_recovery",
147 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); 147 WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
148 if (!gfs_recovery_wq) 148 if (!gfs_recovery_wq)
149 goto fail_wq; 149 goto fail_wq;
150 150
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 52a0bcaa7b6d..b1991a2a08e0 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode)
397 u32 start, len, goal; 397 u32 start, len, goal;
398 int res; 398 int res;
399 399
400 if (sbi->total_blocks - sbi->free_blocks + 8 > 400 if (sbi->alloc_file->i_size * 8 <
401 sbi->alloc_file->i_size * 8) { 401 sbi->total_blocks - sbi->free_blocks + 8) {
402 /* extend alloc file */ 402 /* extend alloc file */
403 printk(KERN_ERR "hfs: extend alloc file! " 403 printk(KERN_ERR "hfs: extend alloc file! "
404 "(%llu,%u,%u)\n", 404 "(%llu,%u,%u)\n",
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index d66ad113b1cc..40ad88c12c64 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb,
134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, 134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
135 data, READ); 135 data, READ);
136 if (res) 136 if (res)
137 return res; 137 goto out;
138 138
139 switch (be16_to_cpu(*((__be16 *)data))) { 139 switch (be16_to_cpu(*((__be16 *)data))) {
140 case HFS_OLD_PMAP_MAGIC: 140 case HFS_OLD_PMAP_MAGIC:
@@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb,
147 res = -ENOENT; 147 res = -ENOENT;
148 break; 148 break;
149 } 149 }
150 150out:
151 kfree(data); 151 kfree(data);
152 return res; 152 return res;
153} 153}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a3b4795f43c..b49b55584c84 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
338 struct inode *root, *inode; 338 struct inode *root, *inode;
339 struct qstr str; 339 struct qstr str;
340 struct nls_table *nls = NULL; 340 struct nls_table *nls = NULL;
341 int err = -EINVAL; 341 int err;
342 342
343 err = -EINVAL;
343 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 344 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
344 if (!sbi) 345 if (!sbi)
345 return -ENOMEM; 346 goto out;
346 347
347 sb->s_fs_info = sbi; 348 sb->s_fs_info = sbi;
348 mutex_init(&sbi->alloc_mutex); 349 mutex_init(&sbi->alloc_mutex);
349 mutex_init(&sbi->vh_mutex); 350 mutex_init(&sbi->vh_mutex);
350 hfsplus_fill_defaults(sbi); 351 hfsplus_fill_defaults(sbi);
352
353 err = -EINVAL;
351 if (!hfsplus_parse_options(data, sbi)) { 354 if (!hfsplus_parse_options(data, sbi)) {
352 printk(KERN_ERR "hfs: unable to parse mount options\n"); 355 printk(KERN_ERR "hfs: unable to parse mount options\n");
353 err = -EINVAL; 356 goto out_unload_nls;
354 goto cleanup;
355 } 357 }
356 358
357 /* temporarily use utf8 to correctly find the hidden dir below */ 359 /* temporarily use utf8 to correctly find the hidden dir below */
@@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
359 sbi->nls = load_nls("utf8"); 361 sbi->nls = load_nls("utf8");
360 if (!sbi->nls) { 362 if (!sbi->nls) {
361 printk(KERN_ERR "hfs: unable to load nls for utf8\n"); 363 printk(KERN_ERR "hfs: unable to load nls for utf8\n");
362 err = -EINVAL; 364 goto out_unload_nls;
363 goto cleanup;
364 } 365 }
365 366
366 /* Grab the volume header */ 367 /* Grab the volume header */
367 if (hfsplus_read_wrapper(sb)) { 368 if (hfsplus_read_wrapper(sb)) {
368 if (!silent) 369 if (!silent)
369 printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); 370 printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n");
370 err = -EINVAL; 371 goto out_unload_nls;
371 goto cleanup;
372 } 372 }
373 vhdr = sbi->s_vhdr; 373 vhdr = sbi->s_vhdr;
374 374
@@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
377 if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || 377 if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION ||
378 be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { 378 be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) {
379 printk(KERN_ERR "hfs: wrong filesystem version\n"); 379 printk(KERN_ERR "hfs: wrong filesystem version\n");
380 goto cleanup; 380 goto out_free_vhdr;
381 } 381 }
382 sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); 382 sbi->total_blocks = be32_to_cpu(vhdr->total_blocks);
383 sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); 383 sbi->free_blocks = be32_to_cpu(vhdr->free_blocks);
@@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
421 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); 421 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
422 if (!sbi->ext_tree) { 422 if (!sbi->ext_tree) {
423 printk(KERN_ERR "hfs: failed to load extents file\n"); 423 printk(KERN_ERR "hfs: failed to load extents file\n");
424 goto cleanup; 424 goto out_free_vhdr;
425 } 425 }
426 sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); 426 sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
427 if (!sbi->cat_tree) { 427 if (!sbi->cat_tree) {
428 printk(KERN_ERR "hfs: failed to load catalog file\n"); 428 printk(KERN_ERR "hfs: failed to load catalog file\n");
429 goto cleanup; 429 goto out_close_ext_tree;
430 } 430 }
431 431
432 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); 432 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
433 if (IS_ERR(inode)) { 433 if (IS_ERR(inode)) {
434 printk(KERN_ERR "hfs: failed to load allocation file\n"); 434 printk(KERN_ERR "hfs: failed to load allocation file\n");
435 err = PTR_ERR(inode); 435 err = PTR_ERR(inode);
436 goto cleanup; 436 goto out_close_cat_tree;
437 } 437 }
438 sbi->alloc_file = inode; 438 sbi->alloc_file = inode;
439 439
@@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
442 if (IS_ERR(root)) { 442 if (IS_ERR(root)) {
443 printk(KERN_ERR "hfs: failed to load root directory\n"); 443 printk(KERN_ERR "hfs: failed to load root directory\n");
444 err = PTR_ERR(root); 444 err = PTR_ERR(root);
445 goto cleanup; 445 goto out_put_alloc_file;
446 }
447 sb->s_d_op = &hfsplus_dentry_operations;
448 sb->s_root = d_alloc_root(root);
449 if (!sb->s_root) {
450 iput(root);
451 err = -ENOMEM;
452 goto cleanup;
453 } 446 }
454 447
455 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 448 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
@@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
459 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { 452 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
460 hfs_find_exit(&fd); 453 hfs_find_exit(&fd);
461 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) 454 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
462 goto cleanup; 455 goto out_put_root;
463 inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); 456 inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
464 if (IS_ERR(inode)) { 457 if (IS_ERR(inode)) {
465 err = PTR_ERR(inode); 458 err = PTR_ERR(inode);
466 goto cleanup; 459 goto out_put_root;
467 } 460 }
468 sbi->hidden_dir = inode; 461 sbi->hidden_dir = inode;
469 } else 462 } else
470 hfs_find_exit(&fd); 463 hfs_find_exit(&fd);
471 464
472 if (sb->s_flags & MS_RDONLY) 465 if (!(sb->s_flags & MS_RDONLY)) {
473 goto out; 466 /*
467 * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused
468 * all three are registered with Apple for our use
469 */
470 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
471 vhdr->modify_date = hfsp_now2mt();
472 be32_add_cpu(&vhdr->write_count, 1);
473 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
474 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
475 hfsplus_sync_fs(sb, 1);
474 476
475 /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused 477 if (!sbi->hidden_dir) {
476 * all three are registered with Apple for our use 478 mutex_lock(&sbi->vh_mutex);
477 */ 479 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
478 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); 480 hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str,
479 vhdr->modify_date = hfsp_now2mt(); 481 sbi->hidden_dir);
480 be32_add_cpu(&vhdr->write_count, 1); 482 mutex_unlock(&sbi->vh_mutex);
481 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 483
482 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 484 hfsplus_mark_inode_dirty(sbi->hidden_dir,
483 hfsplus_sync_fs(sb, 1); 485 HFSPLUS_I_CAT_DIRTY);
484 486 }
485 if (!sbi->hidden_dir) {
486 mutex_lock(&sbi->vh_mutex);
487 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
488 hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
489 &str, sbi->hidden_dir);
490 mutex_unlock(&sbi->vh_mutex);
491
492 hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY);
493 } 487 }
494out: 488
489 sb->s_d_op = &hfsplus_dentry_operations;
490 sb->s_root = d_alloc_root(root);
491 if (!sb->s_root) {
492 err = -ENOMEM;
493 goto out_put_hidden_dir;
494 }
495
495 unload_nls(sbi->nls); 496 unload_nls(sbi->nls);
496 sbi->nls = nls; 497 sbi->nls = nls;
497 return 0; 498 return 0;
498 499
499cleanup: 500out_put_hidden_dir:
500 hfsplus_put_super(sb); 501 iput(sbi->hidden_dir);
502out_put_root:
503 iput(sbi->alloc_file);
504out_put_alloc_file:
505 iput(sbi->alloc_file);
506out_close_cat_tree:
507 hfs_btree_close(sbi->cat_tree);
508out_close_ext_tree:
509 hfs_btree_close(sbi->ext_tree);
510out_free_vhdr:
511 kfree(sbi->s_vhdr);
512 kfree(sbi->s_backup_vhdr);
513out_unload_nls:
514 unload_nls(sbi->nls);
501 unload_nls(nls); 515 unload_nls(nls);
516 kfree(sbi);
517out:
502 return err; 518 return err;
503} 519}
504 520
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 196231794f64..3031d81f5f0f 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -167,7 +167,7 @@ reread:
167 break; 167 break;
168 case cpu_to_be16(HFSP_WRAP_MAGIC): 168 case cpu_to_be16(HFSP_WRAP_MAGIC):
169 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) 169 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
170 goto out; 170 goto out_free_backup_vhdr;
171 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; 171 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
172 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; 172 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
173 part_size = wd.embed_count * wd.ablk_size; 173 part_size = wd.embed_count * wd.ablk_size;
@@ -179,7 +179,7 @@ reread:
179 * (should do this only for cdrom/loop though) 179 * (should do this only for cdrom/loop though)
180 */ 180 */
181 if (hfs_part_find(sb, &part_start, &part_size)) 181 if (hfs_part_find(sb, &part_start, &part_size))
182 goto out; 182 goto out_free_backup_vhdr;
183 goto reread; 183 goto reread;
184 } 184 }
185 185
diff --git a/fs/ioctl.c b/fs/ioctl.c
index a59635e295fa..1eebeb72b202 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode,
273 len = isize; 273 len = isize;
274 } 274 }
275 275
276 /*
277 * Some filesystems can't deal with being asked to map less than
278 * blocksize, so make sure our len is at least block length.
279 */
280 if (logical_to_blk(inode, len) == 0)
281 len = blk_to_logical(inode, 1);
282
276 start_blk = logical_to_blk(inode, start); 283 start_blk = logical_to_blk(inode, start);
277 last_blk = logical_to_blk(inode, start + len - 1); 284 last_blk = logical_to_blk(inode, start + len - 1);
278 285
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e4686900f18..97e73469b2c4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
473} 473}
474 474
475/* 475/*
476 * Called under j_state_lock. Returns true if a transaction commit was started. 476 * Called with j_state_lock locked for writing.
477 * Returns true if a transaction commit was started.
477 */ 478 */
478int __jbd2_log_start_commit(journal_t *journal, tid_t target) 479int __jbd2_log_start_commit(journal_t *journal, tid_t target)
479{ 480{
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
520{ 521{
521 transaction_t *transaction = NULL; 522 transaction_t *transaction = NULL;
522 tid_t tid; 523 tid_t tid;
524 int need_to_start = 0;
523 525
524 read_lock(&journal->j_state_lock); 526 read_lock(&journal->j_state_lock);
525 if (journal->j_running_transaction && !current->journal_info) { 527 if (journal->j_running_transaction && !current->journal_info) {
526 transaction = journal->j_running_transaction; 528 transaction = journal->j_running_transaction;
527 __jbd2_log_start_commit(journal, transaction->t_tid); 529 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
530 need_to_start = 1;
528 } else if (journal->j_committing_transaction) 531 } else if (journal->j_committing_transaction)
529 transaction = journal->j_committing_transaction; 532 transaction = journal->j_committing_transaction;
530 533
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
535 538
536 tid = transaction->t_tid; 539 tid = transaction->t_tid;
537 read_unlock(&journal->j_state_lock); 540 read_unlock(&journal->j_state_lock);
541 if (need_to_start)
542 jbd2_log_start_commit(journal, tid);
538 jbd2_log_wait_commit(journal, tid); 543 jbd2_log_wait_commit(journal, tid);
539 return 1; 544 return 1;
540} 545}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd787c7..1d1191050f99 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
117static int start_this_handle(journal_t *journal, handle_t *handle, 117static int start_this_handle(journal_t *journal, handle_t *handle,
118 int gfp_mask) 118 int gfp_mask)
119{ 119{
120 transaction_t *transaction; 120 transaction_t *transaction, *new_transaction = NULL;
121 int needed; 121 tid_t tid;
122 int nblocks = handle->h_buffer_credits; 122 int needed, need_to_start;
123 transaction_t *new_transaction = NULL; 123 int nblocks = handle->h_buffer_credits;
124 124
125 if (nblocks > journal->j_max_transaction_buffers) { 125 if (nblocks > journal->j_max_transaction_buffers) {
126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
222 atomic_sub(nblocks, &transaction->t_outstanding_credits); 222 atomic_sub(nblocks, &transaction->t_outstanding_credits);
223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
224 TASK_UNINTERRUPTIBLE); 224 TASK_UNINTERRUPTIBLE);
225 __jbd2_log_start_commit(journal, transaction->t_tid); 225 tid = transaction->t_tid;
226 need_to_start = !tid_geq(journal->j_commit_request, tid);
226 read_unlock(&journal->j_state_lock); 227 read_unlock(&journal->j_state_lock);
228 if (need_to_start)
229 jbd2_log_start_commit(journal, tid);
227 schedule(); 230 schedule();
228 finish_wait(&journal->j_wait_transaction_locked, &wait); 231 finish_wait(&journal->j_wait_transaction_locked, &wait);
229 goto repeat; 232 goto repeat;
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
442{ 445{
443 transaction_t *transaction = handle->h_transaction; 446 transaction_t *transaction = handle->h_transaction;
444 journal_t *journal = transaction->t_journal; 447 journal_t *journal = transaction->t_journal;
445 int ret; 448 tid_t tid;
449 int need_to_start, ret;
446 450
447 /* If we've had an abort of any type, don't even think about 451 /* If we've had an abort of any type, don't even think about
448 * actually doing the restart! */ 452 * actually doing the restart! */
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
465 spin_unlock(&transaction->t_handle_lock); 469 spin_unlock(&transaction->t_handle_lock);
466 470
467 jbd_debug(2, "restarting handle %p\n", handle); 471 jbd_debug(2, "restarting handle %p\n", handle);
468 __jbd2_log_start_commit(journal, transaction->t_tid); 472 tid = transaction->t_tid;
473 need_to_start = !tid_geq(journal->j_commit_request, tid);
469 read_unlock(&journal->j_state_lock); 474 read_unlock(&journal->j_state_lock);
475 if (need_to_start)
476 jbd2_log_start_commit(journal, tid);
470 477
471 lock_map_release(&handle->h_lockdep_map); 478 lock_map_release(&handle->h_lockdep_map);
472 handle->h_buffer_credits = nblocks; 479 handle->h_buffer_credits = nblocks;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 5f1bcb2f06f3..b7c99bfb3da6 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -520,7 +520,7 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
520 struct nsm_handle *nsm, 520 struct nsm_handle *nsm,
521 const struct nlm_reboot *info) 521 const struct nlm_reboot *info)
522{ 522{
523 struct nlm_host *host = NULL; 523 struct nlm_host *host;
524 struct hlist_head *chain; 524 struct hlist_head *chain;
525 struct hlist_node *pos; 525 struct hlist_node *pos;
526 526
@@ -532,12 +532,13 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
532 host->h_state++; 532 host->h_state++;
533 533
534 nlm_get_host(host); 534 nlm_get_host(host);
535 goto out; 535 mutex_unlock(&nlm_host_mutex);
536 return host;
536 } 537 }
537 } 538 }
538out: 539
539 mutex_unlock(&nlm_host_mutex); 540 mutex_unlock(&nlm_host_mutex);
540 return host; 541 return NULL;
541} 542}
542 543
543/** 544/**
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24d32a9..0087cf9c2c6b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
455 struct fs_struct *fs = current->fs; 455 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 456 struct dentry *parent = nd->path.dentry;
457 457
458 /*
459 * It can be possible to revalidate the dentry that we started
460 * the path walk with. force_reval_path may also revalidate the
461 * dentry already committed to the nameidata.
462 */
463 if (unlikely(parent == dentry))
464 return nameidata_drop_rcu(nd);
465
466 BUG_ON(!(nd->flags & LOOKUP_RCU)); 458 BUG_ON(!(nd->flags & LOOKUP_RCU));
467 if (nd->root.mnt) { 459 if (nd->root.mnt) {
468 spin_lock(&fs->lock); 460 spin_lock(&fs->lock);
@@ -561,39 +553,25 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
561 */ 553 */
562void release_open_intent(struct nameidata *nd) 554void release_open_intent(struct nameidata *nd)
563{ 555{
564 if (nd->intent.open.file->f_path.dentry == NULL) 556 struct file *file = nd->intent.open.file;
565 put_filp(nd->intent.open.file);
566 else
567 fput(nd->intent.open.file);
568}
569
570/*
571 * Call d_revalidate and handle filesystems that request rcu-walk
572 * to be dropped. This may be called and return in rcu-walk mode,
573 * regardless of success or error. If -ECHILD is returned, the caller
574 * must return -ECHILD back up the path walk stack so path walk may
575 * be restarted in ref-walk mode.
576 */
577static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
578{
579 int status;
580 557
581 status = dentry->d_op->d_revalidate(dentry, nd); 558 if (file && !IS_ERR(file)) {
582 if (status == -ECHILD) { 559 if (file->f_path.dentry == NULL)
583 if (nameidata_dentry_drop_rcu(nd, dentry)) 560 put_filp(file);
584 return status; 561 else
585 status = dentry->d_op->d_revalidate(dentry, nd); 562 fput(file);
586 } 563 }
564}
587 565
588 return status; 566static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
567{
568 return dentry->d_op->d_revalidate(dentry, nd);
589} 569}
590 570
591static inline struct dentry * 571static struct dentry *
592do_revalidate(struct dentry *dentry, struct nameidata *nd) 572do_revalidate(struct dentry *dentry, struct nameidata *nd)
593{ 573{
594 int status; 574 int status = d_revalidate(dentry, nd);
595
596 status = d_revalidate(dentry, nd);
597 if (unlikely(status <= 0)) { 575 if (unlikely(status <= 0)) {
598 /* 576 /*
599 * The dentry failed validation. 577 * The dentry failed validation.
@@ -602,24 +580,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
602 * to return a fail status. 580 * to return a fail status.
603 */ 581 */
604 if (status < 0) { 582 if (status < 0) {
605 /* If we're in rcu-walk, we don't have a ref */ 583 dput(dentry);
606 if (!(nd->flags & LOOKUP_RCU))
607 dput(dentry);
608 dentry = ERR_PTR(status); 584 dentry = ERR_PTR(status);
609 585 } else if (!d_invalidate(dentry)) {
610 } else { 586 dput(dentry);
611 /* Don't d_invalidate in rcu-walk mode */ 587 dentry = NULL;
612 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
613 return ERR_PTR(-ECHILD);
614 if (!d_invalidate(dentry)) {
615 dput(dentry);
616 dentry = NULL;
617 }
618 } 588 }
619 } 589 }
620 return dentry; 590 return dentry;
621} 591}
622 592
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
623static inline int need_reval_dot(struct dentry *dentry) 616static inline int need_reval_dot(struct dentry *dentry)
624{ 617{
625 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
@@ -664,9 +657,6 @@ force_reval_path(struct path *path, struct nameidata *nd)
664 return 0; 657 return 0;
665 658
666 if (!status) { 659 if (!status) {
667 /* Don't d_invalidate in rcu-walk mode */
668 if (nameidata_drop_rcu(nd))
669 return -ECHILD;
670 d_invalidate(dentry); 660 d_invalidate(dentry);
671 status = -ESTALE; 661 status = -ESTALE;
672 } 662 }
@@ -773,6 +763,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
773 int error; 763 int error;
774 struct dentry *dentry = link->dentry; 764 struct dentry *dentry = link->dentry;
775 765
766 BUG_ON(nd->flags & LOOKUP_RCU);
767
776 touch_atime(link->mnt, dentry); 768 touch_atime(link->mnt, dentry);
777 nd_set_link(nd, NULL); 769 nd_set_link(nd, NULL);
778 770
@@ -803,10 +795,16 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
803 * Without that kind of total limit, nasty chains of consecutive 795 * Without that kind of total limit, nasty chains of consecutive
804 * symlinks can cause almost arbitrarily long lookups. 796 * symlinks can cause almost arbitrarily long lookups.
805 */ 797 */
806static inline int do_follow_link(struct path *path, struct nameidata *nd) 798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
807{ 799{
808 void *cookie; 800 void *cookie;
809 int err = -ELOOP; 801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
810 if (current->link_count >= MAX_NESTED_LINKS) 808 if (current->link_count >= MAX_NESTED_LINKS)
811 goto loop; 809 goto loop;
812 if (current->total_link_count >= 40) 810 if (current->total_link_count >= 40)
@@ -1251,9 +1249,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1251 return -ECHILD; 1249 return -ECHILD;
1252 1250
1253 nd->seq = seq; 1251 nd->seq = seq;
1254 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1255 goto need_revalidate; 1253 dentry = do_revalidate_rcu(dentry, nd);
1256done2: 1254 if (!dentry)
1255 goto need_lookup;
1256 if (IS_ERR(dentry))
1257 goto fail;
1258 if (!(nd->flags & LOOKUP_RCU))
1259 goto done;
1260 }
1257 path->mnt = mnt; 1261 path->mnt = mnt;
1258 path->dentry = dentry; 1262 path->dentry = dentry;
1259 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1263 if (likely(__follow_mount_rcu(nd, path, inode, false)))
@@ -1266,8 +1270,13 @@ done2:
1266 if (!dentry) 1270 if (!dentry)
1267 goto need_lookup; 1271 goto need_lookup;
1268found: 1272found:
1269 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1270 goto need_revalidate; 1274 dentry = do_revalidate(dentry, nd);
1275 if (!dentry)
1276 goto need_lookup;
1277 if (IS_ERR(dentry))
1278 goto fail;
1279 }
1271done: 1280done:
1272 path->mnt = mnt; 1281 path->mnt = mnt;
1273 path->dentry = dentry; 1282 path->dentry = dentry;
@@ -1309,16 +1318,6 @@ need_lookup:
1309 mutex_unlock(&dir->i_mutex); 1318 mutex_unlock(&dir->i_mutex);
1310 goto found; 1319 goto found;
1311 1320
1312need_revalidate:
1313 dentry = do_revalidate(dentry, nd);
1314 if (!dentry)
1315 goto need_lookup;
1316 if (IS_ERR(dentry))
1317 goto fail;
1318 if (nd->flags & LOOKUP_RCU)
1319 goto done2;
1320 goto done;
1321
1322fail: 1321fail:
1323 return PTR_ERR(dentry); 1322 return PTR_ERR(dentry);
1324} 1323}
@@ -1415,11 +1414,7 @@ exec_again:
1415 goto out_dput; 1414 goto out_dput;
1416 1415
1417 if (inode->i_op->follow_link) { 1416 if (inode->i_op->follow_link) {
1418 /* We commonly drop rcu-walk here */ 1417 err = do_follow_link(inode, &next, nd);
1419 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1420 return -ECHILD;
1421 BUG_ON(inode != next.dentry->d_inode);
1422 err = do_follow_link(&next, nd);
1423 if (err) 1418 if (err)
1424 goto return_err; 1419 goto return_err;
1425 nd->inode = nd->path.dentry->d_inode; 1420 nd->inode = nd->path.dentry->d_inode;
@@ -1463,10 +1458,7 @@ last_component:
1463 break; 1458 break;
1464 if (inode && unlikely(inode->i_op->follow_link) && 1459 if (inode && unlikely(inode->i_op->follow_link) &&
1465 (lookup_flags & LOOKUP_FOLLOW)) { 1460 (lookup_flags & LOOKUP_FOLLOW)) {
1466 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) 1461 err = do_follow_link(inode, &next, nd);
1467 return -ECHILD;
1468 BUG_ON(inode != next.dentry->d_inode);
1469 err = do_follow_link(&next, nd);
1470 if (err) 1462 if (err)
1471 goto return_err; 1463 goto return_err;
1472 nd->inode = nd->path.dentry->d_inode; 1464 nd->inode = nd->path.dentry->d_inode;
@@ -1500,12 +1492,15 @@ return_reval:
1500 * We may need to check the cached dentry for staleness. 1492 * We may need to check the cached dentry for staleness.
1501 */ 1493 */
1502 if (need_reval_dot(nd->path.dentry)) { 1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1503 /* Note: we do not d_invalidate() */ 1497 /* Note: we do not d_invalidate() */
1504 err = d_revalidate(nd->path.dentry, nd); 1498 err = d_revalidate(nd->path.dentry, nd);
1505 if (!err) 1499 if (!err)
1506 err = -ESTALE; 1500 err = -ESTALE;
1507 if (err < 0) 1501 if (err < 0)
1508 break; 1502 break;
1503 return 0;
1509 } 1504 }
1510return_base: 1505return_base:
1511 if (nameidata_drop_rcu_last_maybe(nd)) 1506 if (nameidata_drop_rcu_last_maybe(nd))
@@ -2265,8 +2260,6 @@ static struct file *finish_open(struct nameidata *nd,
2265 return filp; 2260 return filp;
2266 2261
2267exit: 2262exit:
2268 if (!IS_ERR(nd->intent.open.file))
2269 release_open_intent(nd);
2270 path_put(&nd->path); 2263 path_put(&nd->path);
2271 return ERR_PTR(error); 2264 return ERR_PTR(error);
2272} 2265}
@@ -2389,8 +2382,6 @@ exit_mutex_unlock:
2389exit_dput: 2382exit_dput:
2390 path_put_conditional(path, nd); 2383 path_put_conditional(path, nd);
2391exit: 2384exit:
2392 if (!IS_ERR(nd->intent.open.file))
2393 release_open_intent(nd);
2394 path_put(&nd->path); 2385 path_put(&nd->path);
2395 return ERR_PTR(error); 2386 return ERR_PTR(error);
2396} 2387}
@@ -2477,6 +2468,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
2477 } 2468 }
2478 audit_inode(pathname, nd.path.dentry); 2469 audit_inode(pathname, nd.path.dentry);
2479 filp = finish_open(&nd, open_flag, acc_mode); 2470 filp = finish_open(&nd, open_flag, acc_mode);
2471 release_open_intent(&nd);
2480 return filp; 2472 return filp;
2481 2473
2482creat: 2474creat:
@@ -2553,6 +2545,7 @@ out:
2553 path_put(&nd.root); 2545 path_put(&nd.root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2546 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
2555 goto reval; 2547 goto reval;
2548 release_open_intent(&nd);
2556 return filp; 2549 return filp;
2557 2550
2558exit_dput: 2551exit_dput:
@@ -2560,8 +2553,6 @@ exit_dput:
2560out_path: 2553out_path:
2561 path_put(&nd.path); 2554 path_put(&nd.path);
2562out_filp: 2555out_filp:
2563 if (!IS_ERR(nd.intent.open.file))
2564 release_open_intent(&nd);
2565 filp = ERR_PTR(error); 2556 filp = ERR_PTR(error);
2566 goto out; 2557 goto out;
2567} 2558}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 199016528fcb..e3d294269058 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -135,33 +135,6 @@ out_err:
135 135
136#if defined(CONFIG_NFS_V4_1) 136#if defined(CONFIG_NFS_V4_1)
137/* 137/*
138 * * CB_SEQUENCE operations will fail until the callback sessionid is set.
139 * */
140int nfs4_set_callback_sessionid(struct nfs_client *clp)
141{
142 struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
143 struct nfs4_sessionid *bc_sid;
144
145 if (!serv->sv_bc_xprt)
146 return -EINVAL;
147
148 /* on success freed in xprt_free */
149 bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
150 if (!bc_sid)
151 return -ENOMEM;
152 memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
153 NFS4_MAX_SESSIONID_LEN);
154 spin_lock_bh(&serv->sv_cb_lock);
155 serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
156 spin_unlock_bh(&serv->sv_cb_lock);
157 dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
158 ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
159 ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
160 serv->sv_bc_xprt);
161 return 0;
162}
163
164/*
165 * The callback service for NFSv4.1 callbacks 138 * The callback service for NFSv4.1 callbacks
166 */ 139 */
167static int 140static int
@@ -266,10 +239,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
266 struct nfs_callback_data *cb_info) 239 struct nfs_callback_data *cb_info)
267{ 240{
268} 241}
269int nfs4_set_callback_sessionid(struct nfs_client *clp)
270{
271 return 0;
272}
273#endif /* CONFIG_NFS_V4_1 */ 242#endif /* CONFIG_NFS_V4_1 */
274 243
275/* 244/*
@@ -359,78 +328,58 @@ void nfs_callback_down(int minorversion)
359 mutex_unlock(&nfs_callback_mutex); 328 mutex_unlock(&nfs_callback_mutex);
360} 329}
361 330
362static int check_gss_callback_principal(struct nfs_client *clp, 331/* Boolean check of RPC_AUTH_GSS principal */
363 struct svc_rqst *rqstp) 332int
333check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
364{ 334{
365 struct rpc_clnt *r = clp->cl_rpcclient; 335 struct rpc_clnt *r = clp->cl_rpcclient;
366 char *p = svc_gss_principal(rqstp); 336 char *p = svc_gss_principal(rqstp);
367 337
338 if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
339 return 1;
340
368 /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */ 341 /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
369 if (clp->cl_minorversion != 0) 342 if (clp->cl_minorversion != 0)
370 return SVC_DROP; 343 return 0;
371 /* 344 /*
372 * It might just be a normal user principal, in which case 345 * It might just be a normal user principal, in which case
373 * userspace won't bother to tell us the name at all. 346 * userspace won't bother to tell us the name at all.
374 */ 347 */
375 if (p == NULL) 348 if (p == NULL)
376 return SVC_DENIED; 349 return 0;
377 350
378 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ 351 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
379 352
380 if (memcmp(p, "nfs@", 4) != 0) 353 if (memcmp(p, "nfs@", 4) != 0)
381 return SVC_DENIED; 354 return 0;
382 p += 4; 355 p += 4;
383 if (strcmp(p, r->cl_server) != 0) 356 if (strcmp(p, r->cl_server) != 0)
384 return SVC_DENIED; 357 return 0;
385 return SVC_OK; 358 return 1;
386} 359}
387 360
388/* pg_authenticate method helper */ 361/*
389static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp) 362 * pg_authenticate method for nfsv4 callback threads.
390{ 363 *
391 struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp); 364 * The authflavor has been negotiated, so an incorrect flavor is a server
392 int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0; 365 * bug. Drop packets with incorrect authflavor.
393 366 *
394 dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc); 367 * All other checking done after NFS decoding where the nfs_client can be
395 if (svc_is_backchannel(rqstp)) 368 * found in nfs4_callback_compound
396 /* Sessionid (usually) set after CB_NULL ping */ 369 */
397 return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
398 is_cb_compound);
399 else
400 /* No callback identifier in pg_authenticate */
401 return nfs4_find_client_no_ident(svc_addr(rqstp));
402}
403
404/* pg_authenticate method for nfsv4 callback threads. */
405static int nfs_callback_authenticate(struct svc_rqst *rqstp) 370static int nfs_callback_authenticate(struct svc_rqst *rqstp)
406{ 371{
407 struct nfs_client *clp;
408 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
409 int ret = SVC_OK;
410
411 /* Don't talk to strangers */
412 clp = nfs_cb_find_client(rqstp);
413 if (clp == NULL)
414 return SVC_DROP;
415
416 dprintk("%s: %s NFSv4 callback!\n", __func__,
417 svc_print_addr(rqstp, buf, sizeof(buf)));
418
419 switch (rqstp->rq_authop->flavour) { 372 switch (rqstp->rq_authop->flavour) {
420 case RPC_AUTH_NULL: 373 case RPC_AUTH_NULL:
421 if (rqstp->rq_proc != CB_NULL) 374 if (rqstp->rq_proc != CB_NULL)
422 ret = SVC_DENIED; 375 return SVC_DROP;
423 break; 376 break;
424 case RPC_AUTH_UNIX: 377 case RPC_AUTH_GSS:
425 break; 378 /* No RPC_AUTH_GSS support yet in NFSv4.1 */
426 case RPC_AUTH_GSS: 379 if (svc_is_backchannel(rqstp))
427 ret = check_gss_callback_principal(clp, rqstp); 380 return SVC_DROP;
428 break;
429 default:
430 ret = SVC_DENIED;
431 } 381 }
432 nfs_put_client(clp); 382 return SVC_OK;
433 return ret;
434} 383}
435 384
436/* 385/*
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d3b44f9bd747..46d93ce7311b 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -7,6 +7,7 @@
7 */ 7 */
8#ifndef __LINUX_FS_NFS_CALLBACK_H 8#ifndef __LINUX_FS_NFS_CALLBACK_H
9#define __LINUX_FS_NFS_CALLBACK_H 9#define __LINUX_FS_NFS_CALLBACK_H
10#include <linux/sunrpc/svc.h>
10 11
11#define NFS4_CALLBACK 0x40000000 12#define NFS4_CALLBACK 0x40000000
12#define NFS4_CALLBACK_XDRSIZE 2048 13#define NFS4_CALLBACK_XDRSIZE 2048
@@ -37,7 +38,6 @@ enum nfs4_callback_opnum {
37struct cb_process_state { 38struct cb_process_state {
38 __be32 drc_status; 39 __be32 drc_status;
39 struct nfs_client *clp; 40 struct nfs_client *clp;
40 struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */
41}; 41};
42 42
43struct cb_compound_hdr_arg { 43struct cb_compound_hdr_arg {
@@ -168,7 +168,7 @@ extern unsigned nfs4_callback_layoutrecall(
168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); 168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
169extern void nfs4_cb_take_slot(struct nfs_client *clp); 169extern void nfs4_cb_take_slot(struct nfs_client *clp);
170#endif /* CONFIG_NFS_V4_1 */ 170#endif /* CONFIG_NFS_V4_1 */
171 171extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
172extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, 172extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
173 struct cb_getattrres *res, 173 struct cb_getattrres *res,
174 struct cb_process_state *cps); 174 struct cb_process_state *cps);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 4bb91cb2620d..89587573fe50 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -373,17 +373,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
373{ 373{
374 struct nfs_client *clp; 374 struct nfs_client *clp;
375 int i; 375 int i;
376 __be32 status; 376 __be32 status = htonl(NFS4ERR_BADSESSION);
377 377
378 cps->clp = NULL; 378 cps->clp = NULL;
379 379
380 status = htonl(NFS4ERR_BADSESSION); 380 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
381 /* Incoming session must match the callback session */
382 if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
383 goto out;
384
385 clp = nfs4_find_client_sessionid(args->csa_addr,
386 &args->csa_sessionid, 1);
387 if (clp == NULL) 381 if (clp == NULL)
388 goto out; 382 goto out;
389 383
@@ -414,9 +408,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
414 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 408 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
415 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 409 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
416 nfs4_cb_take_slot(clp); 410 nfs4_cb_take_slot(clp);
417 cps->clp = clp; /* put in nfs4_callback_compound */
418 411
419out: 412out:
413 cps->clp = clp; /* put in nfs4_callback_compound */
420 for (i = 0; i < args->csa_nrclists; i++) 414 for (i = 0; i < args->csa_nrclists; i++)
421 kfree(args->csa_rclists[i].rcl_refcalls); 415 kfree(args->csa_rclists[i].rcl_refcalls);
422 kfree(args->csa_rclists); 416 kfree(args->csa_rclists);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23112c263f81..14e0f9371d14 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -794,10 +794,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
794 794
795 if (hdr_arg.minorversion == 0) { 795 if (hdr_arg.minorversion == 0) {
796 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); 796 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
797 if (!cps.clp) 797 if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
798 return rpc_drop_reply; 798 return rpc_drop_reply;
799 } else 799 }
800 cps.svc_sid = bc_xprt_sid(rqstp);
801 800
802 hdr_res.taglen = hdr_arg.taglen; 801 hdr_res.taglen = hdr_arg.taglen;
803 hdr_res.tag = hdr_arg.tag; 802 hdr_res.tag = hdr_arg.tag;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 192f2f860265..bd3ca32879e7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1206,16 +1206,11 @@ nfs4_find_client_ident(int cb_ident)
1206 * For CB_COMPOUND calls, find a client by IP address, protocol version, 1206 * For CB_COMPOUND calls, find a client by IP address, protocol version,
1207 * minorversion, and sessionID 1207 * minorversion, and sessionID
1208 * 1208 *
1209 * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
1210 * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
1211 * can arrive before the callback sessionid is set. For CB_NULL calls,
1212 * find a client by IP address protocol version, and minorversion.
1213 *
1214 * Returns NULL if no such client 1209 * Returns NULL if no such client
1215 */ 1210 */
1216struct nfs_client * 1211struct nfs_client *
1217nfs4_find_client_sessionid(const struct sockaddr *addr, 1212nfs4_find_client_sessionid(const struct sockaddr *addr,
1218 struct nfs4_sessionid *sid, int is_cb_compound) 1213 struct nfs4_sessionid *sid)
1219{ 1214{
1220 struct nfs_client *clp; 1215 struct nfs_client *clp;
1221 1216
@@ -1227,9 +1222,9 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
1227 if (!nfs4_has_session(clp)) 1222 if (!nfs4_has_session(clp))
1228 continue; 1223 continue;
1229 1224
1230 /* Match sessionid unless cb_null call*/ 1225 /* Match sessionid*/
1231 if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data, 1226 if (memcmp(clp->cl_session->sess_id.data,
1232 sid->data, NFS4_MAX_SESSIONID_LEN) != 0)) 1227 sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
1233 continue; 1228 continue;
1234 1229
1235 atomic_inc(&clp->cl_count); 1230 atomic_inc(&clp->cl_count);
@@ -1244,7 +1239,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
1244 1239
1245struct nfs_client * 1240struct nfs_client *
1246nfs4_find_client_sessionid(const struct sockaddr *addr, 1241nfs4_find_client_sessionid(const struct sockaddr *addr,
1247 struct nfs4_sessionid *sid, int is_cb_compound) 1242 struct nfs4_sessionid *sid)
1248{ 1243{
1249 return NULL; 1244 return NULL;
1250} 1245}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 364e4328f392..bbbc6bf5cb2e 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -23,8 +23,6 @@
23 23
24static void nfs_do_free_delegation(struct nfs_delegation *delegation) 24static void nfs_do_free_delegation(struct nfs_delegation *delegation)
25{ 25{
26 if (delegation->cred)
27 put_rpccred(delegation->cred);
28 kfree(delegation); 26 kfree(delegation);
29} 27}
30 28
@@ -37,6 +35,10 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
37 35
38static void nfs_free_delegation(struct nfs_delegation *delegation) 36static void nfs_free_delegation(struct nfs_delegation *delegation)
39{ 37{
38 if (delegation->cred) {
39 put_rpccred(delegation->cred);
40 delegation->cred = NULL;
41 }
40 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 42 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
41} 43}
42 44
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e6ace0d93c71..9943a75bb6d1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
407 pos += vec->iov_len; 407 pos += vec->iov_len;
408 } 408 }
409 409
410 /*
411 * If no bytes were started, return the error, and let the
412 * generic layer handle the completion.
413 */
414 if (requested_bytes == 0) {
415 nfs_direct_req_release(dreq);
416 return result < 0 ? result : -EIO;
417 }
418
410 if (put_dreq(dreq)) 419 if (put_dreq(dreq))
411 nfs_direct_complete(dreq); 420 nfs_direct_complete(dreq);
412 421 return 0;
413 if (requested_bytes != 0)
414 return 0;
415
416 if (result < 0)
417 return result;
418 return -EIO;
419} 422}
420 423
421static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, 424static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
@@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
841 pos += vec->iov_len; 844 pos += vec->iov_len;
842 } 845 }
843 846
847 /*
848 * If no bytes were started, return the error, and let the
849 * generic layer handle the completion.
850 */
851 if (requested_bytes == 0) {
852 nfs_direct_req_release(dreq);
853 return result < 0 ? result : -EIO;
854 }
855
844 if (put_dreq(dreq)) 856 if (put_dreq(dreq))
845 nfs_direct_write_complete(dreq, dreq->inode); 857 nfs_direct_write_complete(dreq, dreq->inode);
846 858 return 0;
847 if (requested_bytes != 0)
848 return 0;
849
850 if (result < 0)
851 return result;
852 return -EIO;
853} 859}
854 860
855static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, 861static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d8512423ba72..1cc600e77bb4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -881,9 +881,10 @@ out:
881 return ret; 881 return ret;
882} 882}
883 883
884static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 884static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
885{ 885{
886 struct nfs_inode *nfsi = NFS_I(inode); 886 struct nfs_inode *nfsi = NFS_I(inode);
887 unsigned long ret = 0;
887 888
888 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) 889 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
889 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) 890 && (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -891,25 +892,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
891 nfsi->change_attr = fattr->change_attr; 892 nfsi->change_attr = fattr->change_attr;
892 if (S_ISDIR(inode->i_mode)) 893 if (S_ISDIR(inode->i_mode))
893 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 894 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
895 ret |= NFS_INO_INVALID_ATTR;
894 } 896 }
895 /* If we have atomic WCC data, we may update some attributes */ 897 /* If we have atomic WCC data, we may update some attributes */
896 if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) 898 if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
897 && (fattr->valid & NFS_ATTR_FATTR_CTIME) 899 && (fattr->valid & NFS_ATTR_FATTR_CTIME)
898 && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) 900 && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
899 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 901 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
902 ret |= NFS_INO_INVALID_ATTR;
903 }
900 904
901 if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) 905 if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
902 && (fattr->valid & NFS_ATTR_FATTR_MTIME) 906 && (fattr->valid & NFS_ATTR_FATTR_MTIME)
903 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { 907 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
904 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 908 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
905 if (S_ISDIR(inode->i_mode)) 909 if (S_ISDIR(inode->i_mode))
906 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 910 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
911 ret |= NFS_INO_INVALID_ATTR;
907 } 912 }
908 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 913 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
909 && (fattr->valid & NFS_ATTR_FATTR_SIZE) 914 && (fattr->valid & NFS_ATTR_FATTR_SIZE)
910 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) 915 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
911 && nfsi->npages == 0) 916 && nfsi->npages == 0) {
912 i_size_write(inode, nfs_size_to_loff_t(fattr->size)); 917 i_size_write(inode, nfs_size_to_loff_t(fattr->size));
918 ret |= NFS_INO_INVALID_ATTR;
919 }
920 return ret;
913} 921}
914 922
915/** 923/**
@@ -1223,7 +1231,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1223 | NFS_INO_REVAL_PAGECACHE); 1231 | NFS_INO_REVAL_PAGECACHE);
1224 1232
1225 /* Do atomic weak cache consistency updates */ 1233 /* Do atomic weak cache consistency updates */
1226 nfs_wcc_update_inode(inode, fattr); 1234 invalid |= nfs_wcc_update_inode(inode, fattr);
1227 1235
1228 /* More cache consistency checks */ 1236 /* More cache consistency checks */
1229 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { 1237 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4644f04b4b46..cf9fdbdabc67 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -133,8 +133,7 @@ extern void nfs_put_client(struct nfs_client *);
133extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); 133extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
134extern struct nfs_client *nfs4_find_client_ident(int); 134extern struct nfs_client *nfs4_find_client_ident(int);
135extern struct nfs_client * 135extern struct nfs_client *
136nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *, 136nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *);
137 int);
138extern struct nfs_server *nfs_create_server( 137extern struct nfs_server *nfs_create_server(
139 const struct nfs_parsed_mount_data *, 138 const struct nfs_parsed_mount_data *,
140 struct nfs_fh *); 139 struct nfs_fh *);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9f88c5f4c7e2..274342771655 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
311 if (!nfs_server_capable(inode, NFS_CAP_ACLS)) 311 if (!nfs_server_capable(inode, NFS_CAP_ACLS))
312 goto out; 312 goto out;
313 313
314 /* We are doing this here, because XDR marshalling can only 314 /* We are doing this here because XDR marshalling does not
315 return -ENOMEM. */ 315 * return any results, it BUGs. */
316 status = -ENOSPC; 316 status = -ENOSPC;
317 if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) 317 if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
318 goto out; 318 goto out;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 01c5e8b1941d..183c6b123d0f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1328,10 +1328,13 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
1328 1328
1329 encode_nfs_fh3(xdr, NFS_FH(args->inode)); 1329 encode_nfs_fh3(xdr, NFS_FH(args->inode));
1330 encode_uint32(xdr, args->mask); 1330 encode_uint32(xdr, args->mask);
1331
1332 base = req->rq_slen;
1331 if (args->npages != 0) 1333 if (args->npages != 0)
1332 xdr_write_pages(xdr, args->pages, 0, args->len); 1334 xdr_write_pages(xdr, args->pages, 0, args->len);
1335 else
1336 xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
1333 1337
1334 base = req->rq_slen;
1335 error = nfsacl_encode(xdr->buf, base, args->inode, 1338 error = nfsacl_encode(xdr->buf, base, args->inode,
1336 (args->mask & NFS_ACL) ? 1339 (args->mask & NFS_ACL) ?
1337 args->acl_access : NULL, 1, 0); 1340 args->acl_access : NULL, 1, 0);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 51fe64ace55a..f5c9b125e8cc 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -214,7 +214,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
214 214
215 /* ipv6 length plus port is legal */ 215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) { 216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__, 217 dprintk("%s: Invalid address, length %d\n", __func__,
218 rlen); 218 rlen);
219 goto out_err; 219 goto out_err;
220 } 220 }
@@ -225,6 +225,11 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
225 /* replace the port dots with dashes for the in4_pton() delimiter*/ 225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) { 226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.'); 227 char *res = strrchr(buf, '.');
228 if (!res) {
229 dprintk("%s: Failed finding expected dots in port\n",
230 __func__);
231 goto out_free;
232 }
228 *res = '-'; 233 *res = '-';
229 } 234 }
230 235
@@ -240,7 +245,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
240 port = htons((tmp[0] << 8) | (tmp[1])); 245 port = htons((tmp[0] << 8) | (tmp[1]));
241 246
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port); 247 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf); 248 dprintk("%s: Decoded address and port %s\n", __func__, buf);
244out_free: 249out_free:
245 kfree(buf); 250 kfree(buf);
246out_err: 251out_err:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9d992b0346e3..78936a8f40ab 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -50,6 +50,7 @@
50#include <linux/module.h> 50#include <linux/module.h>
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h> 52#include <linux/xattr.h>
53#include <linux/utsname.h>
53 54
54#include "nfs4_fs.h" 55#include "nfs4_fs.h"
55#include "delegation.h" 56#include "delegation.h"
@@ -4572,27 +4573,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4572 *p = htonl((u32)clp->cl_boot_time.tv_nsec); 4573 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
4573 args.verifier = &verifier; 4574 args.verifier = &verifier;
4574 4575
4575 while (1) { 4576 args.id_len = scnprintf(args.id, sizeof(args.id),
4576 args.id_len = scnprintf(args.id, sizeof(args.id), 4577 "%s/%s.%s/%u",
4577 "%s/%s %u", 4578 clp->cl_ipaddr,
4578 clp->cl_ipaddr, 4579 init_utsname()->nodename,
4579 rpc_peeraddr2str(clp->cl_rpcclient, 4580 init_utsname()->domainname,
4580 RPC_DISPLAY_ADDR), 4581 clp->cl_rpcclient->cl_auth->au_flavor);
4581 clp->cl_id_uniquifier);
4582
4583 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
4584
4585 if (status != -NFS4ERR_CLID_INUSE)
4586 break;
4587
4588 if (signalled())
4589 break;
4590
4591 if (++clp->cl_id_uniquifier == 0)
4592 break;
4593 }
4594 4582
4595 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4583 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
4584 if (!status)
4585 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4596 dprintk("<-- %s status= %d\n", __func__, status); 4586 dprintk("<-- %s status= %d\n", __func__, status);
4597 return status; 4587 return status;
4598} 4588}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2336d532cf66..e6742b57a04c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -232,12 +232,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
232 status = nfs4_proc_create_session(clp); 232 status = nfs4_proc_create_session(clp);
233 if (status != 0) 233 if (status != 0)
234 goto out; 234 goto out;
235 status = nfs4_set_callback_sessionid(clp);
236 if (status != 0) {
237 printk(KERN_WARNING "Sessionid not set. No callback service\n");
238 nfs_callback_down(1);
239 status = 0;
240 }
241 nfs41_setup_state_renewal(clp); 235 nfs41_setup_state_renewal(clp);
242 nfs_mark_client_ready(clp, NFS_CS_READY); 236 nfs_mark_client_ready(clp, NFS_CS_READY);
243out: 237out:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2ab8e5cb8f59..4e2c168b6ee9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6086,11 +6086,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6086 __be32 *p = xdr_inline_decode(xdr, 4); 6086 __be32 *p = xdr_inline_decode(xdr, 4);
6087 if (unlikely(!p)) 6087 if (unlikely(!p))
6088 goto out_overflow; 6088 goto out_overflow;
6089 if (!ntohl(*p++)) { 6089 if (*p == xdr_zero) {
6090 p = xdr_inline_decode(xdr, 4); 6090 p = xdr_inline_decode(xdr, 4);
6091 if (unlikely(!p)) 6091 if (unlikely(!p))
6092 goto out_overflow; 6092 goto out_overflow;
6093 if (!ntohl(*p++)) 6093 if (*p == xdr_zero)
6094 return -EAGAIN; 6094 return -EAGAIN;
6095 entry->eof = 1; 6095 entry->eof = 1;
6096 return -EBADCOOKIE; 6096 return -EBADCOOKIE;
@@ -6101,7 +6101,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6101 goto out_overflow; 6101 goto out_overflow;
6102 entry->prev_cookie = entry->cookie; 6102 entry->prev_cookie = entry->cookie;
6103 p = xdr_decode_hyper(p, &entry->cookie); 6103 p = xdr_decode_hyper(p, &entry->cookie);
6104 entry->len = ntohl(*p++); 6104 entry->len = be32_to_cpup(p);
6105 6105
6106 p = xdr_inline_decode(xdr, entry->len); 6106 p = xdr_inline_decode(xdr, entry->len);
6107 if (unlikely(!p)) 6107 if (unlikely(!p))
@@ -6132,9 +6132,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6132 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) 6132 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
6133 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); 6133 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
6134 6134
6135 if (verify_attr_len(xdr, p, len) < 0)
6136 goto out_overflow;
6137
6138 return 0; 6135 return 0;
6139 6136
6140out_overflow: 6137out_overflow:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bc4089769735..1b1bc1a0fb0a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -951,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp)
951{ 951{
952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache; 952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
953 953
954 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); 954 dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { 955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
956 int i; 956 int i;
957 /* Verify cache is empty */ 957 /* Verify cache is empty */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 10d648ea128b..c8278f4046cb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -932,7 +932,7 @@ out_bad:
932 while (!list_empty(&list)) { 932 while (!list_empty(&list)) {
933 data = list_entry(list.next, struct nfs_write_data, pages); 933 data = list_entry(list.next, struct nfs_write_data, pages);
934 list_del(&data->pages); 934 list_del(&data->pages);
935 nfs_writedata_release(data); 935 nfs_writedata_free(data);
936 } 936 }
937 nfs_redirty_request(req); 937 nfs_redirty_request(req);
938 return -ENOMEM; 938 return -ENOMEM;
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index fc1c52571c03..84c27d69d421 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -42,6 +42,11 @@ struct nfsacl_encode_desc {
42 gid_t gid; 42 gid_t gid;
43}; 43};
44 44
45struct nfsacl_simple_acl {
46 struct posix_acl acl;
47 struct posix_acl_entry ace[4];
48};
49
45static int 50static int
46xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) 51xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
47{ 52{
@@ -72,9 +77,20 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
72 return 0; 77 return 0;
73} 78}
74 79
75unsigned int 80/**
76nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, 81 * nfsacl_encode - Encode an NFSv3 ACL
77 struct posix_acl *acl, int encode_entries, int typeflag) 82 *
83 * @buf: destination xdr_buf to contain XDR encoded ACL
84 * @base: byte offset in xdr_buf where XDR'd ACL begins
85 * @inode: inode of file whose ACL this is
86 * @acl: posix_acl to encode
87 * @encode_entries: whether to encode ACEs as well
88 * @typeflag: ACL type: NFS_ACL_DEFAULT or zero
89 *
90 * Returns size of encoded ACL in bytes or a negative errno value.
91 */
92int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
93 struct posix_acl *acl, int encode_entries, int typeflag)
78{ 94{
79 int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; 95 int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
80 struct nfsacl_encode_desc nfsacl_desc = { 96 struct nfsacl_encode_desc nfsacl_desc = {
@@ -88,17 +104,22 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
88 .uid = inode->i_uid, 104 .uid = inode->i_uid,
89 .gid = inode->i_gid, 105 .gid = inode->i_gid,
90 }; 106 };
107 struct nfsacl_simple_acl aclbuf;
91 int err; 108 int err;
92 struct posix_acl *acl2 = NULL;
93 109
94 if (entries > NFS_ACL_MAX_ENTRIES || 110 if (entries > NFS_ACL_MAX_ENTRIES ||
95 xdr_encode_word(buf, base, entries)) 111 xdr_encode_word(buf, base, entries))
96 return -EINVAL; 112 return -EINVAL;
97 if (encode_entries && acl && acl->a_count == 3) { 113 if (encode_entries && acl && acl->a_count == 3) {
98 /* Fake up an ACL_MASK entry. */ 114 struct posix_acl *acl2 = &aclbuf.acl;
99 acl2 = posix_acl_alloc(4, GFP_KERNEL); 115
100 if (!acl2) 116 /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is
101 return -ENOMEM; 117 * invoked in contexts where a memory allocation failure is
118 * fatal. Fortunately this fake ACL is small enough to
119 * construct on the stack. */
120 memset(acl2, 0, sizeof(acl2));
121 posix_acl_init(acl2, 4);
122
102 /* Insert entries in canonical order: other orders seem 123 /* Insert entries in canonical order: other orders seem
103 to confuse Solaris VxFS. */ 124 to confuse Solaris VxFS. */
104 acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ 125 acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */
@@ -109,8 +130,6 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
109 nfsacl_desc.acl = acl2; 130 nfsacl_desc.acl = acl2;
110 } 131 }
111 err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); 132 err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
112 if (acl2)
113 posix_acl_release(acl2);
114 if (!err) 133 if (!err)
115 err = 8 + nfsacl_desc.desc.elem_size * 134 err = 8 + nfsacl_desc.desc.elem_size *
116 nfsacl_desc.desc.array_len; 135 nfsacl_desc.desc.array_len;
@@ -224,9 +243,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
224 return 0; 243 return 0;
225} 244}
226 245
227unsigned int 246/**
228nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, 247 * nfsacl_decode - Decode an NFSv3 ACL
229 struct posix_acl **pacl) 248 *
249 * @buf: xdr_buf containing XDR'd ACL data to decode
250 * @base: byte offset in xdr_buf where XDR'd ACL begins
251 * @aclcnt: count of ACEs in decoded posix_acl
252 * @pacl: buffer in which to place decoded posix_acl
253 *
254 * Returns the length of the decoded ACL in bytes, or a negative errno value.
255 */
256int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
257 struct posix_acl **pacl)
230{ 258{
231 struct nfsacl_decode_desc nfsacl_desc = { 259 struct nfsacl_decode_desc nfsacl_desc = {
232 .desc = { 260 .desc = {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3be975e18919..cde36cb0f348 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
484out: 484out:
485 return status; 485 return status;
486out_default: 486out_default:
487 return nfs_cb_stat_to_errno(status); 487 return nfs_cb_stat_to_errno(nfserr);
488} 488}
489 489
490/* 490/*
@@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
564 if (unlikely(status)) 564 if (unlikely(status))
565 goto out; 565 goto out;
566 if (unlikely(nfserr != NFS4_OK)) 566 if (unlikely(nfserr != NFS4_OK))
567 goto out_default; 567 status = nfs_cb_stat_to_errno(nfserr);
568out: 568out:
569 return status; 569 return status;
570out_default:
571 return nfs_cb_stat_to_errno(status);
572} 570}
573 571
574/* 572/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d98d0213285d..54b60bfceb8d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
230 dp->dl_client = clp; 230 dp->dl_client = clp;
231 get_nfs4_file(fp); 231 get_nfs4_file(fp);
232 dp->dl_file = fp; 232 dp->dl_file = fp;
233 dp->dl_vfs_file = find_readable_file(fp);
234 get_file(dp->dl_vfs_file);
235 dp->dl_flock = NULL;
236 dp->dl_type = type; 233 dp->dl_type = type;
237 dp->dl_stateid.si_boot = boot_time; 234 dp->dl_stateid.si_boot = boot_time;
238 dp->dl_stateid.si_stateownerid = current_delegid++; 235 dp->dl_stateid.si_stateownerid = current_delegid++;
@@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
241 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 238 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
242 dp->dl_time = 0; 239 dp->dl_time = 0;
243 atomic_set(&dp->dl_count, 1); 240 atomic_set(&dp->dl_count, 1);
244 list_add(&dp->dl_perfile, &fp->fi_delegations);
245 list_add(&dp->dl_perclnt, &clp->cl_delegations);
246 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); 241 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
247 return dp; 242 return dp;
248} 243}
@@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
253 if (atomic_dec_and_test(&dp->dl_count)) { 248 if (atomic_dec_and_test(&dp->dl_count)) {
254 dprintk("NFSD: freeing dp %p\n",dp); 249 dprintk("NFSD: freeing dp %p\n",dp);
255 put_nfs4_file(dp->dl_file); 250 put_nfs4_file(dp->dl_file);
256 fput(dp->dl_vfs_file);
257 kmem_cache_free(deleg_slab, dp); 251 kmem_cache_free(deleg_slab, dp);
258 num_delegations--; 252 num_delegations--;
259 } 253 }
260} 254}
261 255
262/* Remove the associated file_lock first, then remove the delegation. 256static void nfs4_put_deleg_lease(struct nfs4_file *fp)
263 * lease_modify() is called to remove the FS_LEASE file_lock from
264 * the i_flock list, eventually calling nfsd's lock_manager
265 * fl_release_callback.
266 */
267static void
268nfs4_close_delegation(struct nfs4_delegation *dp)
269{ 257{
270 dprintk("NFSD: close_delegation dp %p\n",dp); 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
271 /* XXX: do we even need this check?: */ 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
272 if (dp->dl_flock) 260 fp->fi_lease = NULL;
273 vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); 261 fp->fi_deleg_file = NULL;
262 }
274} 263}
275 264
276/* Called under the state lock. */ 265/* Called under the state lock. */
277static void 266static void
278unhash_delegation(struct nfs4_delegation *dp) 267unhash_delegation(struct nfs4_delegation *dp)
279{ 268{
280 list_del_init(&dp->dl_perfile);
281 list_del_init(&dp->dl_perclnt); 269 list_del_init(&dp->dl_perclnt);
282 spin_lock(&recall_lock); 270 spin_lock(&recall_lock);
271 list_del_init(&dp->dl_perfile);
283 list_del_init(&dp->dl_recall_lru); 272 list_del_init(&dp->dl_recall_lru);
284 spin_unlock(&recall_lock); 273 spin_unlock(&recall_lock);
285 nfs4_close_delegation(dp); 274 nfs4_put_deleg_lease(dp->dl_file);
286 nfs4_put_delegation(dp); 275 nfs4_put_delegation(dp);
287} 276}
288 277
@@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp)
958 spin_lock(&recall_lock); 947 spin_lock(&recall_lock);
959 while (!list_empty(&clp->cl_delegations)) { 948 while (!list_empty(&clp->cl_delegations)) {
960 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 949 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
961 dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
962 dp->dl_flock);
963 list_del_init(&dp->dl_perclnt); 950 list_del_init(&dp->dl_perclnt);
964 list_move(&dp->dl_recall_lru, &reaplist); 951 list_move(&dp->dl_recall_lru, &reaplist);
965 } 952 }
@@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino)
2078 fp->fi_inode = igrab(ino); 2065 fp->fi_inode = igrab(ino);
2079 fp->fi_id = current_fileid++; 2066 fp->fi_id = current_fileid++;
2080 fp->fi_had_conflict = false; 2067 fp->fi_had_conflict = false;
2068 fp->fi_lease = NULL;
2081 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 2069 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
2082 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 2070 memset(fp->fi_access, 0, sizeof(fp->fi_access));
2083 spin_lock(&recall_lock); 2071 spin_lock(&recall_lock);
@@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
2329 nfs4_file_put_access(fp, O_RDONLY); 2317 nfs4_file_put_access(fp, O_RDONLY);
2330} 2318}
2331 2319
2332/* 2320static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
2333 * Spawn a thread to perform a recall on the delegation represented
2334 * by the lease (file_lock)
2335 *
2336 * Called from break_lease() with lock_flocks() held.
2337 * Note: we assume break_lease will only call this *once* for any given
2338 * lease.
2339 */
2340static
2341void nfsd_break_deleg_cb(struct file_lock *fl)
2342{ 2321{
2343 struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2344
2345 dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
2346 if (!dp)
2347 return;
2348
2349 /* We're assuming the state code never drops its reference 2322 /* We're assuming the state code never drops its reference
2350 * without first removing the lease. Since we're in this lease 2323 * without first removing the lease. Since we're in this lease
2351 * callback (and since the lease code is serialized by the kernel 2324 * callback (and since the lease code is serialized by the kernel
@@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2353 * it's safe to take a reference: */ 2326 * it's safe to take a reference: */
2354 atomic_inc(&dp->dl_count); 2327 atomic_inc(&dp->dl_count);
2355 2328
2356 spin_lock(&recall_lock);
2357 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 2329 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
2358 spin_unlock(&recall_lock);
2359 2330
2360 /* only place dl_time is set. protected by lock_flocks*/ 2331 /* only place dl_time is set. protected by lock_flocks*/
2361 dp->dl_time = get_seconds(); 2332 dp->dl_time = get_seconds();
2362 2333
2334 nfsd4_cb_recall(dp);
2335}
2336
2337/* Called from break_lease() with lock_flocks() held. */
2338static void nfsd_break_deleg_cb(struct file_lock *fl)
2339{
2340 struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
2341 struct nfs4_delegation *dp;
2342
2343 BUG_ON(!fp);
2344 /* We assume break_lease is only called once per lease: */
2345 BUG_ON(fp->fi_had_conflict);
2363 /* 2346 /*
2364 * We don't want the locks code to timeout the lease for us; 2347 * We don't want the locks code to timeout the lease for us;
2365 * we'll remove it ourself if the delegation isn't returned 2348 * we'll remove it ourself if a delegation isn't returned
2366 * in time. 2349 * in time:
2367 */ 2350 */
2368 fl->fl_break_time = 0; 2351 fl->fl_break_time = 0;
2369 2352
2370 dp->dl_file->fi_had_conflict = true; 2353 spin_lock(&recall_lock);
2371 nfsd4_cb_recall(dp); 2354 fp->fi_had_conflict = true;
2355 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2356 nfsd_break_one_deleg(dp);
2357 spin_unlock(&recall_lock);
2372} 2358}
2373 2359
2374static 2360static
@@ -2459,13 +2445,15 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2459static struct nfs4_delegation * 2445static struct nfs4_delegation *
2460find_delegation_file(struct nfs4_file *fp, stateid_t *stid) 2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2461{ 2447{
2462 struct nfs4_delegation *dp; 2448 struct nfs4_delegation *dp = NULL;
2463 2449
2450 spin_lock(&recall_lock);
2464 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
2465 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
2466 return dp; 2453 break;
2467 } 2454 }
2468 return NULL; 2455 spin_unlock(&recall_lock);
2456 return dp;
2469} 2457}
2470 2458
2471int share_access_to_flags(u32 share_access) 2459int share_access_to_flags(u32 share_access)
@@ -2641,6 +2629,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
2641 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; 2629 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
2642} 2630}
2643 2631
2632static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
2633{
2634 struct file_lock *fl;
2635
2636 fl = locks_alloc_lock();
2637 if (!fl)
2638 return NULL;
2639 locks_init_lock(fl);
2640 fl->fl_lmops = &nfsd_lease_mng_ops;
2641 fl->fl_flags = FL_LEASE;
2642 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2643 fl->fl_end = OFFSET_MAX;
2644 fl->fl_owner = (fl_owner_t)(dp->dl_file);
2645 fl->fl_pid = current->tgid;
2646 return fl;
2647}
2648
2649static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2650{
2651 struct nfs4_file *fp = dp->dl_file;
2652 struct file_lock *fl;
2653 int status;
2654
2655 fl = nfs4_alloc_init_lease(dp, flag);
2656 if (!fl)
2657 return -ENOMEM;
2658 fl->fl_file = find_readable_file(fp);
2659 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2660 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
2661 if (status) {
2662 list_del_init(&dp->dl_perclnt);
2663 locks_free_lock(fl);
2664 return -ENOMEM;
2665 }
2666 fp->fi_lease = fl;
2667 fp->fi_deleg_file = fl->fl_file;
2668 get_file(fp->fi_deleg_file);
2669 atomic_set(&fp->fi_delegees, 1);
2670 list_add(&dp->dl_perfile, &fp->fi_delegations);
2671 return 0;
2672}
2673
2674static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
2675{
2676 struct nfs4_file *fp = dp->dl_file;
2677
2678 if (!fp->fi_lease)
2679 return nfs4_setlease(dp, flag);
2680 spin_lock(&recall_lock);
2681 if (fp->fi_had_conflict) {
2682 spin_unlock(&recall_lock);
2683 return -EAGAIN;
2684 }
2685 atomic_inc(&fp->fi_delegees);
2686 list_add(&dp->dl_perfile, &fp->fi_delegations);
2687 spin_unlock(&recall_lock);
2688 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2689 return 0;
2690}
2691
2644/* 2692/*
2645 * Attempt to hand out a delegation. 2693 * Attempt to hand out a delegation.
2646 */ 2694 */
@@ -2650,7 +2698,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2650 struct nfs4_delegation *dp; 2698 struct nfs4_delegation *dp;
2651 struct nfs4_stateowner *sop = stp->st_stateowner; 2699 struct nfs4_stateowner *sop = stp->st_stateowner;
2652 int cb_up; 2700 int cb_up;
2653 struct file_lock *fl;
2654 int status, flag = 0; 2701 int status, flag = 0;
2655 2702
2656 cb_up = nfsd4_cb_channel_good(sop->so_client); 2703 cb_up = nfsd4_cb_channel_good(sop->so_client);
@@ -2681,36 +2728,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2681 } 2728 }
2682 2729
2683 dp = alloc_init_deleg(sop->so_client, stp, fh, flag); 2730 dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
2684 if (dp == NULL) { 2731 if (dp == NULL)
2685 flag = NFS4_OPEN_DELEGATE_NONE; 2732 goto out_no_deleg;
2686 goto out; 2733 status = nfs4_set_delegation(dp, flag);
2687 } 2734 if (status)
2688 status = -ENOMEM; 2735 goto out_free;
2689 fl = locks_alloc_lock();
2690 if (!fl)
2691 goto out;
2692 locks_init_lock(fl);
2693 fl->fl_lmops = &nfsd_lease_mng_ops;
2694 fl->fl_flags = FL_LEASE;
2695 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2696 fl->fl_end = OFFSET_MAX;
2697 fl->fl_owner = (fl_owner_t)dp;
2698 fl->fl_file = find_readable_file(stp->st_file);
2699 BUG_ON(!fl->fl_file);
2700 fl->fl_pid = current->tgid;
2701 dp->dl_flock = fl;
2702
2703 /* vfs_setlease checks to see if delegation should be handed out.
2704 * the lock_manager callback fl_change is used
2705 */
2706 if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
2707 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2708 dp->dl_flock = NULL;
2709 locks_free_lock(fl);
2710 unhash_delegation(dp);
2711 flag = NFS4_OPEN_DELEGATE_NONE;
2712 goto out;
2713 }
2714 2736
2715 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2737 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2716 2738
@@ -2722,6 +2744,12 @@ out:
2722 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) 2744 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2723 dprintk("NFSD: WARNING: refusing delegation reclaim\n"); 2745 dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2724 open->op_delegate_type = flag; 2746 open->op_delegate_type = flag;
2747 return;
2748out_free:
2749 nfs4_put_delegation(dp);
2750out_no_deleg:
2751 flag = NFS4_OPEN_DELEGATE_NONE;
2752 goto out;
2725} 2753}
2726 2754
2727/* 2755/*
@@ -2916,8 +2944,6 @@ nfs4_laundromat(void)
2916 test_val = u; 2944 test_val = u;
2917 break; 2945 break;
2918 } 2946 }
2919 dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
2920 dp, dp->dl_flock);
2921 list_move(&dp->dl_recall_lru, &reaplist); 2947 list_move(&dp->dl_recall_lru, &reaplist);
2922 } 2948 }
2923 spin_unlock(&recall_lock); 2949 spin_unlock(&recall_lock);
@@ -3128,7 +3154,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
3128 goto out; 3154 goto out;
3129 renew_client(dp->dl_client); 3155 renew_client(dp->dl_client);
3130 if (filpp) { 3156 if (filpp) {
3131 *filpp = find_readable_file(dp->dl_file); 3157 *filpp = dp->dl_file->fi_deleg_file;
3132 BUG_ON(!*filpp); 3158 BUG_ON(!*filpp);
3133 } 3159 }
3134 } else { /* open or lock stateid */ 3160 } else { /* open or lock stateid */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b9cdc9..1275b8655070 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
317 READ_BUF(dummy32); 317 READ_BUF(dummy32);
318 len += (XDR_QUADLEN(dummy32) << 2); 318 len += (XDR_QUADLEN(dummy32) << 2);
319 READMEM(buf, dummy32); 319 READMEM(buf, dummy32);
320 if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) 320 if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
321 goto out_nfserr; 321 return status;
322 iattr->ia_valid |= ATTR_UID; 322 iattr->ia_valid |= ATTR_UID;
323 } 323 }
324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { 324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
328 READ_BUF(dummy32); 328 READ_BUF(dummy32);
329 len += (XDR_QUADLEN(dummy32) << 2); 329 len += (XDR_QUADLEN(dummy32) << 2);
330 READMEM(buf, dummy32); 330 READMEM(buf, dummy32);
331 if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) 331 if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
332 goto out_nfserr; 332 return status;
333 iattr->ia_valid |= ATTR_GID; 333 iattr->ia_valid |= ATTR_GID;
334 } 334 }
335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3074656ba7bf..2d31224b07bf 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -83,8 +83,6 @@ struct nfs4_delegation {
83 atomic_t dl_count; /* ref count */ 83 atomic_t dl_count; /* ref count */
84 struct nfs4_client *dl_client; 84 struct nfs4_client *dl_client;
85 struct nfs4_file *dl_file; 85 struct nfs4_file *dl_file;
86 struct file *dl_vfs_file;
87 struct file_lock *dl_flock;
88 u32 dl_type; 86 u32 dl_type;
89 time_t dl_time; 87 time_t dl_time;
90/* For recall: */ 88/* For recall: */
@@ -379,6 +377,9 @@ struct nfs4_file {
379 */ 377 */
380 atomic_t fi_readers; 378 atomic_t fi_readers;
381 atomic_t fi_writers; 379 atomic_t fi_writers;
380 struct file *fi_deleg_file;
381 struct file_lock *fi_lease;
382 atomic_t fi_delegees;
382 struct inode *fi_inode; 383 struct inode *fi_inode;
383 u32 fi_id; /* used with stateowner->so_id 384 u32 fi_id; /* used with stateowner->so_id
384 * for stateid_hashtbl hash */ 385 * for stateid_hashtbl hash */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 641117f2188d..da1d9701f8e4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
808 if (ra->p_count == 0) 808 if (ra->p_count == 0)
809 frap = rap; 809 frap = rap;
810 } 810 }
811 depth = nfsdstats.ra_size*11/10; 811 depth = nfsdstats.ra_size;
812 if (!frap) { 812 if (!frap) {
813 spin_unlock(&rab->pb_lock); 813 spin_unlock(&rab->pb_lock);
814 return NULL; 814 return NULL;
@@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1744 host_err = nfsd_break_lease(odentry->d_inode); 1744 host_err = nfsd_break_lease(odentry->d_inode);
1745 if (host_err) 1745 if (host_err)
1746 goto out_drop_write; 1746 goto out_drop_write;
1747 if (ndentry->d_inode) {
1748 host_err = nfsd_break_lease(ndentry->d_inode);
1749 if (host_err)
1750 goto out_drop_write;
1751 }
1752 if (host_err)
1753 goto out_drop_write;
1747 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1754 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1748 if (!host_err) { 1755 if (!host_err) {
1749 host_err = commit_metadata(tfhp); 1756 host_err = commit_metadata(tfhp);
@@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1812 1819
1813 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1820 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1814 if (host_err) 1821 if (host_err)
1815 goto out_nfserr; 1822 goto out_put;
1816 1823
1817 host_err = nfsd_break_lease(rdentry->d_inode); 1824 host_err = nfsd_break_lease(rdentry->d_inode);
1818 if (host_err) 1825 if (host_err)
1819 goto out_put; 1826 goto out_drop_write;
1820 if (type != S_IFDIR) 1827 if (type != S_IFDIR)
1821 host_err = vfs_unlink(dirp, rdentry); 1828 host_err = vfs_unlink(dirp, rdentry);
1822 else 1829 else
1823 host_err = vfs_rmdir(dirp, rdentry); 1830 host_err = vfs_rmdir(dirp, rdentry);
1824out_put:
1825 dput(rdentry);
1826
1827 if (!host_err) 1831 if (!host_err)
1828 host_err = commit_metadata(fhp); 1832 host_err = commit_metadata(fhp);
1829 1833out_drop_write:
1830 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1834 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1835out_put:
1836 dput(rdentry);
1837
1831out_nfserr: 1838out_nfserr:
1832 err = nfserrno(host_err); 1839 err = nfserrno(host_err);
1833out: 1840out:
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0994f6a76c07..58fd707174e1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -704,7 +704,8 @@ skip_mount_setup:
704 sbp[0]->s_state = 704 sbp[0]->s_state =
705 cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); 705 cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
706 /* synchronize sbp[1] with sbp[0] */ 706 /* synchronize sbp[1] with sbp[0] */
707 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); 707 if (sbp[1])
708 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
708 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 709 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
709} 710}
710 711
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b572b6727181..326e7475a22a 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2006 Anton Altaparmakov 4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -2576,6 +2576,8 @@ mft_rec_already_initialized:
2576 flush_dcache_page(page); 2576 flush_dcache_page(page);
2577 SetPageUptodate(page); 2577 SetPageUptodate(page);
2578 if (base_ni) { 2578 if (base_ni) {
2579 MFT_RECORD *m_tmp;
2580
2579 /* 2581 /*
2580 * Setup the base mft record in the extent mft record. This 2582 * Setup the base mft record in the extent mft record. This
2581 * completes initialization of the allocated extent mft record 2583 * completes initialization of the allocated extent mft record
@@ -2588,11 +2590,11 @@ mft_rec_already_initialized:
2588 * attach it to the base inode @base_ni and map, pin, and lock 2590 * attach it to the base inode @base_ni and map, pin, and lock
2589 * its, i.e. the allocated, mft record. 2591 * its, i.e. the allocated, mft record.
2590 */ 2592 */
2591 m = map_extent_mft_record(base_ni, bit, &ni); 2593 m_tmp = map_extent_mft_record(base_ni, bit, &ni);
2592 if (IS_ERR(m)) { 2594 if (IS_ERR(m_tmp)) {
2593 ntfs_error(vol->sb, "Failed to map allocated extent " 2595 ntfs_error(vol->sb, "Failed to map allocated extent "
2594 "mft record 0x%llx.", (long long)bit); 2596 "mft record 0x%llx.", (long long)bit);
2595 err = PTR_ERR(m); 2597 err = PTR_ERR(m_tmp);
2596 /* Set the mft record itself not in use. */ 2598 /* Set the mft record itself not in use. */
2597 m->flags &= cpu_to_le16( 2599 m->flags &= cpu_to_le16(
2598 ~le16_to_cpu(MFT_RECORD_IN_USE)); 2600 ~le16_to_cpu(MFT_RECORD_IN_USE));
@@ -2603,6 +2605,7 @@ mft_rec_already_initialized:
2603 ntfs_unmap_page(page); 2605 ntfs_unmap_page(page);
2604 goto undo_mftbmp_alloc; 2606 goto undo_mftbmp_alloc;
2605 } 2607 }
2608 BUG_ON(m != m_tmp);
2606 /* 2609 /*
2607 * Make sure the allocated mft record is written out to disk. 2610 * Make sure the allocated mft record is written out to disk.
2608 * No need to set the inode dirty because the caller is going 2611 * No need to set the inode dirty because the caller is going
diff --git a/fs/open.c b/fs/open.c
index e52389e1f05b..5a2c6ebc22b5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd)
790 790
791 /* Pick up the filp from the open intent */ 791 /* Pick up the filp from the open intent */
792 filp = nd->intent.open.file; 792 filp = nd->intent.open.file;
793 nd->intent.open.file = NULL;
794
793 /* Has the filesystem initialised the file for us? */ 795 /* Has the filesystem initialised the file for us? */
794 if (filp->f_path.dentry == NULL) { 796 if (filp->f_path.dentry == NULL) {
795 path_get(&nd->path); 797 path_get(&nd->path);
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a216ee79..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
29 29
30int mac_partition(struct parsed_partitions *state) 30int mac_partition(struct parsed_partitions *state)
31{ 31{
32 int slot = 1;
33 Sector sect; 32 Sector sect;
34 unsigned char *data; 33 unsigned char *data;
35 int blk, blocks_in_map; 34 int slot, blocks_in_map;
36 unsigned secsize; 35 unsigned secsize;
37#ifdef CONFIG_PPC_PMAC 36#ifdef CONFIG_PPC_PMAC
38 int found_root = 0; 37 int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
59 put_dev_sector(sect); 58 put_dev_sector(sect);
60 return 0; /* not a MacOS disk */ 59 return 0; /* not a MacOS disk */
61 } 60 }
62 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
63 blocks_in_map = be32_to_cpu(part->map_count); 61 blocks_in_map = be32_to_cpu(part->map_count);
64 for (blk = 1; blk <= blocks_in_map; ++blk) { 62 if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
65 int pos = blk * secsize; 63 put_dev_sector(sect);
64 return 0;
65 }
66 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
67 for (slot = 1; slot <= blocks_in_map; ++slot) {
68 int pos = slot * secsize;
66 put_dev_sector(sect); 69 put_dev_sector(sect);
67 data = read_part_sector(state, pos/512, &sect); 70 data = read_part_sector(state, pos/512, &sect);
68 if (!data) 71 if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
113 } 116 }
114 117
115 if (goodness > found_root_goodness) { 118 if (goodness > found_root_goodness) {
116 found_root = blk; 119 found_root = slot;
117 found_root_goodness = goodness; 120 found_root_goodness = goodness;
118 } 121 }
119 } 122 }
120#endif /* CONFIG_PPC_PMAC */ 123#endif /* CONFIG_PPC_PMAC */
121
122 ++slot;
123 } 124 }
124#ifdef CONFIG_PPC_PMAC 125#ifdef CONFIG_PPC_PMAC
125 if (found_root_goodness) 126 if (found_root_goodness)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 39df95a0ec25..b1cf6bf4b41d 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,6 +22,7 @@
22 22
23#include <linux/errno.h> 23#include <linux/errno.h>
24 24
25EXPORT_SYMBOL(posix_acl_init);
25EXPORT_SYMBOL(posix_acl_alloc); 26EXPORT_SYMBOL(posix_acl_alloc);
26EXPORT_SYMBOL(posix_acl_clone); 27EXPORT_SYMBOL(posix_acl_clone);
27EXPORT_SYMBOL(posix_acl_valid); 28EXPORT_SYMBOL(posix_acl_valid);
@@ -32,6 +33,16 @@ EXPORT_SYMBOL(posix_acl_chmod_masq);
32EXPORT_SYMBOL(posix_acl_permission); 33EXPORT_SYMBOL(posix_acl_permission);
33 34
34/* 35/*
36 * Init a fresh posix_acl
37 */
38void
39posix_acl_init(struct posix_acl *acl, int count)
40{
41 atomic_set(&acl->a_refcount, 1);
42 acl->a_count = count;
43}
44
45/*
35 * Allocate a new ACL with the specified number of entries. 46 * Allocate a new ACL with the specified number of entries.
36 */ 47 */
37struct posix_acl * 48struct posix_acl *
@@ -40,10 +51,8 @@ posix_acl_alloc(int count, gfp_t flags)
40 const size_t size = sizeof(struct posix_acl) + 51 const size_t size = sizeof(struct posix_acl) +
41 count * sizeof(struct posix_acl_entry); 52 count * sizeof(struct posix_acl_entry);
42 struct posix_acl *acl = kmalloc(size, flags); 53 struct posix_acl *acl = kmalloc(size, flags);
43 if (acl) { 54 if (acl)
44 atomic_set(&acl->a_refcount, 1); 55 posix_acl_init(acl, count);
45 acl->a_count = count;
46 }
47 return acl; 56 return acl;
48} 57}
49 58
diff --git a/fs/proc/array.c b/fs/proc/array.c
index df2b703b9d0f..7c99c1cf7e5c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
353 task_cap(m, task); 353 task_cap(m, task);
354 task_cpus_allowed(m, task); 354 task_cpus_allowed(m, task);
355 cpuset_task_status_allowed(m, task); 355 cpuset_task_status_allowed(m, task);
356#if defined(CONFIG_S390)
357 task_show_regs(m, task);
358#endif
359 task_context_switch_counts(m, task); 356 task_context_switch_counts(m, task);
360 return 0; 357 return 0;
361} 358}
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index eafc22ab1fdd..b701eaa482bf 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -67,7 +67,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
67 struct console *con; 67 struct console *con;
68 loff_t off = 0; 68 loff_t off = 0;
69 69
70 acquire_console_sem(); 70 console_lock();
71 for_each_console(con) 71 for_each_console(con)
72 if (off++ == *pos) 72 if (off++ == *pos)
73 break; 73 break;
@@ -84,7 +84,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
84 84
85static void c_stop(struct seq_file *m, void *v) 85static void c_stop(struct seq_file *m, void *v)
86{ 86{
87 release_console_sem(); 87 console_unlock();
88} 88}
89 89
90static const struct seq_operations consoles_op = { 90static const struct seq_operations consoles_op = {
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2fb2882f0fa7..8ab48bc2fa7d 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -63,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb,
63 *length = (unsigned char) bh->b_data[*offset] | 63 *length = (unsigned char) bh->b_data[*offset] |
64 (unsigned char) bh->b_data[*offset + 1] << 8; 64 (unsigned char) bh->b_data[*offset + 1] << 8;
65 *offset += 2; 65 *offset += 2;
66
67 if (*offset == msblk->devblksize) {
68 put_bh(bh);
69 bh = sb_bread(sb, ++(*cur_index));
70 if (bh == NULL)
71 return NULL;
72 *offset = 0;
73 }
66 } 74 }
67 75
68 return bh; 76 return bh;
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 856756ca5ee4..c4eb40018256 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -95,12 +95,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
95 if (!buffer_uptodate(bh[k])) 95 if (!buffer_uptodate(bh[k]))
96 goto release_mutex; 96 goto release_mutex;
97 97
98 if (avail == 0) {
99 offset = 0;
100 put_bh(bh[k++]);
101 continue;
102 }
103
104 stream->buf.in = bh[k]->b_data + offset; 98 stream->buf.in = bh[k]->b_data + offset;
105 stream->buf.in_size = avail; 99 stream->buf.in_size = avail;
106 stream->buf.in_pos = 0; 100 stream->buf.in_pos = 0;
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 818a5e063faf..4661ae2b1cec 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -82,12 +82,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
82 if (!buffer_uptodate(bh[k])) 82 if (!buffer_uptodate(bh[k]))
83 goto release_mutex; 83 goto release_mutex;
84 84
85 if (avail == 0) {
86 offset = 0;
87 put_bh(bh[k++]);
88 continue;
89 }
90
91 stream->next_in = bh[k]->b_data + offset; 85 stream->next_in = bh[k]->b_data + offset;
92 stream->avail_in = avail; 86 stream->avail_in = avail;
93 offset = 0; 87 offset = 0;
diff --git a/fs/super.c b/fs/super.c
index 74e149efed81..7e9dd4cc2c01 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s)
177 struct file_system_type *fs = s->s_type; 177 struct file_system_type *fs = s->s_type;
178 if (atomic_dec_and_test(&s->s_active)) { 178 if (atomic_dec_and_test(&s->s_active)) {
179 fs->kill_sb(s); 179 fs->kill_sb(s);
180 /*
181 * We need to call rcu_barrier so all the delayed rcu free
182 * inodes are flushed before we release the fs module.
183 */
184 rcu_barrier();
180 put_filesystem(fs); 185 put_filesystem(fs);
181 put_super(s); 186 put_super(s);
182 } else { 187 } else {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index b06ede1d0bed..f5e2a19e0f8e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -985,10 +985,22 @@ xfs_ioctl_setattr(
985 985
986 /* 986 /*
987 * Extent size must be a multiple of the appropriate block 987 * Extent size must be a multiple of the appropriate block
988 * size, if set at all. 988 * size, if set at all. It must also be smaller than the
989 * maximum extent size supported by the filesystem.
990 *
991 * Also, for non-realtime files, limit the extent size hint to
992 * half the size of the AGs in the filesystem so alignment
993 * doesn't result in extents larger than an AG.
989 */ 994 */
990 if (fa->fsx_extsize != 0) { 995 if (fa->fsx_extsize != 0) {
991 xfs_extlen_t size; 996 xfs_extlen_t size;
997 xfs_fsblock_t extsize_fsb;
998
999 extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
1000 if (extsize_fsb > MAXEXTLEN) {
1001 code = XFS_ERROR(EINVAL);
1002 goto error_return;
1003 }
992 1004
993 if (XFS_IS_REALTIME_INODE(ip) || 1005 if (XFS_IS_REALTIME_INODE(ip) ||
994 ((mask & FSX_XFLAGS) && 1006 ((mask & FSX_XFLAGS) &&
@@ -997,6 +1009,10 @@ xfs_ioctl_setattr(
997 mp->m_sb.sb_blocklog; 1009 mp->m_sb.sb_blocklog;
998 } else { 1010 } else {
999 size = mp->m_sb.sb_blocksize; 1011 size = mp->m_sb.sb_blocksize;
1012 if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
1013 code = XFS_ERROR(EINVAL);
1014 goto error_return;
1015 }
1000 } 1016 }
1001 1017
1002 if (fa->fsx_extsize % size) { 1018 if (fa->fsx_extsize % size) {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index f8e854b4fde8..206a2815ced6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1863,12 +1863,14 @@ xfs_qm_dqreclaim_one(void)
1863 xfs_dquot_t *dqpout; 1863 xfs_dquot_t *dqpout;
1864 xfs_dquot_t *dqp; 1864 xfs_dquot_t *dqp;
1865 int restarts; 1865 int restarts;
1866 int startagain;
1866 1867
1867 restarts = 0; 1868 restarts = 0;
1868 dqpout = NULL; 1869 dqpout = NULL;
1869 1870
1870 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ 1871 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1871startagain: 1872again:
1873 startagain = 0;
1872 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1874 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1873 1875
1874 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { 1876 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
@@ -1885,13 +1887,10 @@ startagain:
1885 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); 1887 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1886 1888
1887 trace_xfs_dqreclaim_want(dqp); 1889 trace_xfs_dqreclaim_want(dqp);
1888
1889 xfs_dqunlock(dqp);
1890 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1891 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1892 return NULL;
1893 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1890 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1894 goto startagain; 1891 restarts++;
1892 startagain = 1;
1893 goto dqunlock;
1895 } 1894 }
1896 1895
1897 /* 1896 /*
@@ -1906,23 +1905,20 @@ startagain:
1906 ASSERT(list_empty(&dqp->q_mplist)); 1905 ASSERT(list_empty(&dqp->q_mplist));
1907 list_del_init(&dqp->q_freelist); 1906 list_del_init(&dqp->q_freelist);
1908 xfs_Gqm->qm_dqfrlist_cnt--; 1907 xfs_Gqm->qm_dqfrlist_cnt--;
1909 xfs_dqunlock(dqp);
1910 dqpout = dqp; 1908 dqpout = dqp;
1911 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); 1909 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1912 break; 1910 goto dqunlock;
1913 } 1911 }
1914 1912
1915 ASSERT(dqp->q_hash); 1913 ASSERT(dqp->q_hash);
1916 ASSERT(!list_empty(&dqp->q_mplist)); 1914 ASSERT(!list_empty(&dqp->q_mplist));
1917 1915
1918 /* 1916 /*
1919 * Try to grab the flush lock. If this dquot is in the process of 1917 * Try to grab the flush lock. If this dquot is in the process
1920 * getting flushed to disk, we don't want to reclaim it. 1918 * of getting flushed to disk, we don't want to reclaim it.
1921 */ 1919 */
1922 if (!xfs_dqflock_nowait(dqp)) { 1920 if (!xfs_dqflock_nowait(dqp))
1923 xfs_dqunlock(dqp); 1921 goto dqunlock;
1924 continue;
1925 }
1926 1922
1927 /* 1923 /*
1928 * We have the flush lock so we know that this is not in the 1924 * We have the flush lock so we know that this is not in the
@@ -1944,8 +1940,7 @@ startagain:
1944 xfs_fs_cmn_err(CE_WARN, mp, 1940 xfs_fs_cmn_err(CE_WARN, mp,
1945 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 1941 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
1946 } 1942 }
1947 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 1943 goto dqunlock;
1948 continue;
1949 } 1944 }
1950 1945
1951 /* 1946 /*
@@ -1967,13 +1962,8 @@ startagain:
1967 */ 1962 */
1968 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { 1963 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
1969 restarts++; 1964 restarts++;
1970 mutex_unlock(&dqp->q_hash->qh_lock); 1965 startagain = 1;
1971 xfs_dqfunlock(dqp); 1966 goto qhunlock;
1972 xfs_dqunlock(dqp);
1973 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1974 if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
1975 return NULL;
1976 goto startagain;
1977 } 1967 }
1978 1968
1979 ASSERT(dqp->q_nrefs == 0); 1969 ASSERT(dqp->q_nrefs == 0);
@@ -1986,14 +1976,20 @@ startagain:
1986 xfs_Gqm->qm_dqfrlist_cnt--; 1976 xfs_Gqm->qm_dqfrlist_cnt--;
1987 dqpout = dqp; 1977 dqpout = dqp;
1988 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 1978 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1979qhunlock:
1989 mutex_unlock(&dqp->q_hash->qh_lock); 1980 mutex_unlock(&dqp->q_hash->qh_lock);
1990dqfunlock: 1981dqfunlock:
1991 xfs_dqfunlock(dqp); 1982 xfs_dqfunlock(dqp);
1983dqunlock:
1992 xfs_dqunlock(dqp); 1984 xfs_dqunlock(dqp);
1993 if (dqpout) 1985 if (dqpout)
1994 break; 1986 break;
1995 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1987 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1996 return NULL; 1988 break;
1989 if (startagain) {
1990 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1991 goto again;
1992 }
1997 } 1993 }
1998 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1994 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1999 return dqpout; 1995 return dqpout;
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 0ab56b32c7eb..d0b3bc72005b 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -75,6 +75,22 @@ typedef unsigned int xfs_alloctype_t;
75#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) 75#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
76 76
77/* 77/*
78 * When deciding how much space to allocate out of an AG, we limit the
79 * allocation maximum size to the size the AG. However, we cannot use all the
80 * blocks in the AG - some are permanently used by metadata. These
81 * blocks are generally:
82 * - the AG superblock, AGF, AGI and AGFL
83 * - the AGF (bno and cnt) and AGI btree root blocks
84 * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
85 *
86 * The AG headers are sector sized, so the amount of space they take up is
87 * dependent on filesystem geometry. The others are all single blocks.
88 */
89#define XFS_ALLOC_AG_MAX_USABLE(mp) \
90 ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
91
92
93/*
78 * Argument structure for xfs_alloc routines. 94 * Argument structure for xfs_alloc routines.
79 * This is turned into a structure to avoid having 20 arguments passed 95 * This is turned into a structure to avoid having 20 arguments passed
80 * down several levels of the stack. 96 * down several levels of the stack.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 4111cd3966c7..dc3afd7739ff 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1038,17 +1038,34 @@ xfs_bmap_add_extent_delay_real(
1038 * Filling in the middle part of a previous delayed allocation. 1038 * Filling in the middle part of a previous delayed allocation.
1039 * Contiguity is impossible here. 1039 * Contiguity is impossible here.
1040 * This case is avoided almost all the time. 1040 * This case is avoided almost all the time.
1041 *
1042 * We start with a delayed allocation:
1043 *
1044 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1045 * PREV @ idx
1046 *
1047 * and we are allocating:
1048 * +rrrrrrrrrrrrrrrrr+
1049 * new
1050 *
1051 * and we set it up for insertion as:
1052 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1053 * new
1054 * PREV @ idx LEFT RIGHT
1055 * inserted at idx + 1
1041 */ 1056 */
1042 temp = new->br_startoff - PREV.br_startoff; 1057 temp = new->br_startoff - PREV.br_startoff;
1043 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
1044 xfs_bmbt_set_blockcount(ep, temp);
1045 r[0] = *new;
1046 r[1].br_state = PREV.br_state;
1047 r[1].br_startblock = 0;
1048 r[1].br_startoff = new_endoff;
1049 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1058 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1050 r[1].br_blockcount = temp2; 1059 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
1051 xfs_iext_insert(ip, idx + 1, 2, &r[0], state); 1060 xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */
1061 LEFT = *new;
1062 RIGHT.br_state = PREV.br_state;
1063 RIGHT.br_startblock = nullstartblock(
1064 (int)xfs_bmap_worst_indlen(ip, temp2));
1065 RIGHT.br_startoff = new_endoff;
1066 RIGHT.br_blockcount = temp2;
1067 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
1068 xfs_iext_insert(ip, idx + 1, 2, &LEFT, state);
1052 ip->i_df.if_lastex = idx + 1; 1069 ip->i_df.if_lastex = idx + 1;
1053 ip->i_d.di_nextents++; 1070 ip->i_d.di_nextents++;
1054 if (cur == NULL) 1071 if (cur == NULL)
@@ -2430,7 +2447,7 @@ xfs_bmap_btalloc_nullfb(
2430 startag = ag = 0; 2447 startag = ag = 0;
2431 2448
2432 pag = xfs_perag_get(mp, ag); 2449 pag = xfs_perag_get(mp, ag);
2433 while (*blen < ap->alen) { 2450 while (*blen < args->maxlen) {
2434 if (!pag->pagf_init) { 2451 if (!pag->pagf_init) {
2435 error = xfs_alloc_pagf_init(mp, args->tp, ag, 2452 error = xfs_alloc_pagf_init(mp, args->tp, ag,
2436 XFS_ALLOC_FLAG_TRYLOCK); 2453 XFS_ALLOC_FLAG_TRYLOCK);
@@ -2452,7 +2469,7 @@ xfs_bmap_btalloc_nullfb(
2452 notinit = 1; 2469 notinit = 1;
2453 2470
2454 if (xfs_inode_is_filestream(ap->ip)) { 2471 if (xfs_inode_is_filestream(ap->ip)) {
2455 if (*blen >= ap->alen) 2472 if (*blen >= args->maxlen)
2456 break; 2473 break;
2457 2474
2458 if (ap->userdata) { 2475 if (ap->userdata) {
@@ -2498,14 +2515,14 @@ xfs_bmap_btalloc_nullfb(
2498 * If the best seen length is less than the request 2515 * If the best seen length is less than the request
2499 * length, use the best as the minimum. 2516 * length, use the best as the minimum.
2500 */ 2517 */
2501 else if (*blen < ap->alen) 2518 else if (*blen < args->maxlen)
2502 args->minlen = *blen; 2519 args->minlen = *blen;
2503 /* 2520 /*
2504 * Otherwise we've seen an extent as big as alen, 2521 * Otherwise we've seen an extent as big as maxlen,
2505 * use that as the minimum. 2522 * use that as the minimum.
2506 */ 2523 */
2507 else 2524 else
2508 args->minlen = ap->alen; 2525 args->minlen = args->maxlen;
2509 2526
2510 /* 2527 /*
2511 * set the failure fallback case to look in the selected 2528 * set the failure fallback case to look in the selected
@@ -2573,7 +2590,9 @@ xfs_bmap_btalloc(
2573 args.tp = ap->tp; 2590 args.tp = ap->tp;
2574 args.mp = mp; 2591 args.mp = mp;
2575 args.fsbno = ap->rval; 2592 args.fsbno = ap->rval;
2576 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); 2593
2594 /* Trim the allocation back to the maximum an AG can fit. */
2595 args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp));
2577 args.firstblock = ap->firstblock; 2596 args.firstblock = ap->firstblock;
2578 blen = 0; 2597 blen = 0;
2579 if (nullfb) { 2598 if (nullfb) {
@@ -2621,7 +2640,7 @@ xfs_bmap_btalloc(
2621 /* 2640 /*
2622 * Adjust for alignment 2641 * Adjust for alignment
2623 */ 2642 */
2624 if (blen > args.alignment && blen <= ap->alen) 2643 if (blen > args.alignment && blen <= args.maxlen)
2625 args.minlen = blen - args.alignment; 2644 args.minlen = blen - args.alignment;
2626 args.minalignslop = 0; 2645 args.minalignslop = 0;
2627 } else { 2646 } else {
@@ -2640,7 +2659,7 @@ xfs_bmap_btalloc(
2640 * of minlen+alignment+slop doesn't go up 2659 * of minlen+alignment+slop doesn't go up
2641 * between the calls. 2660 * between the calls.
2642 */ 2661 */
2643 if (blen > mp->m_dalign && blen <= ap->alen) 2662 if (blen > mp->m_dalign && blen <= args.maxlen)
2644 nextminlen = blen - mp->m_dalign; 2663 nextminlen = blen - mp->m_dalign;
2645 else 2664 else
2646 nextminlen = args.minlen; 2665 nextminlen = args.minlen;
@@ -4485,6 +4504,16 @@ xfs_bmapi(
4485 /* Figure out the extent size, adjust alen */ 4504 /* Figure out the extent size, adjust alen */
4486 extsz = xfs_get_extsz_hint(ip); 4505 extsz = xfs_get_extsz_hint(ip);
4487 if (extsz) { 4506 if (extsz) {
4507 /*
4508 * make sure we don't exceed a single
4509 * extent length when we align the
4510 * extent by reducing length we are
4511 * going to allocate by the maximum
4512 * amount extent size aligment may
4513 * require.
4514 */
4515 alen = XFS_FILBLKS_MIN(len,
4516 MAXEXTLEN - (2 * extsz - 1));
4488 error = xfs_bmap_extsize_align(mp, 4517 error = xfs_bmap_extsize_align(mp,
4489 &got, &prev, extsz, 4518 &got, &prev, extsz,
4490 rt, eof, 4519 rt, eof,
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 98c6f73b6752..6f8c21ce0d6d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -427,13 +427,15 @@ xfs_buf_item_unpin(
427 427
428 if (remove) { 428 if (remove) {
429 /* 429 /*
430 * We have to remove the log item from the transaction 430 * If we are in a transaction context, we have to
431 * as we are about to release our reference to the 431 * remove the log item from the transaction as we are
432 * buffer. If we don't, the unlock that occurs later 432 * about to release our reference to the buffer. If we
433 * in xfs_trans_uncommit() will ry to reference the 433 * don't, the unlock that occurs later in
434 * xfs_trans_uncommit() will try to reference the
434 * buffer which we no longer have a hold on. 435 * buffer which we no longer have a hold on.
435 */ 436 */
436 xfs_trans_del_item(lip); 437 if (lip->li_desc)
438 xfs_trans_del_item(lip);
437 439
438 /* 440 /*
439 * Since the transaction no longer refers to the buffer, 441 * Since the transaction no longer refers to the buffer,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 75f2ef60e579..d22e62623437 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -138,7 +138,8 @@ xfs_efi_item_unpin(
138 138
139 if (remove) { 139 if (remove) {
140 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); 140 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
141 xfs_trans_del_item(lip); 141 if (lip->li_desc)
142 xfs_trans_del_item(lip);
142 xfs_efi_item_free(efip); 143 xfs_efi_item_free(efip);
143 return; 144 return;
144 } 145 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 55582bd66659..8a0f044750c3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -337,7 +337,12 @@ xfs_iomap_prealloc_size(
337 int shift = 0; 337 int shift = 0;
338 int64_t freesp; 338 int64_t freesp;
339 339
340 alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); 340 /*
341 * rounddown_pow_of_two() returns an undefined result
342 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
343 * ensure we always pass in a non-zero value.
344 */
345 alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1;
341 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 346 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
342 rounddown_pow_of_two(alloc_blocks)); 347 rounddown_pow_of_two(alloc_blocks));
343 348
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 916eb7db14d9..3bd3291ef8d2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -191,7 +191,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 191
192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); 192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
193 193
194int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 194void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
195 struct xfs_log_vec *log_vector, 195 struct xfs_log_vec *log_vector,
196 xfs_lsn_t *commit_lsn, int flags); 196 xfs_lsn_t *commit_lsn, int flags);
197bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 197bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9dc8125d04e5..9ca59be08977 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -543,7 +543,7 @@ xlog_cil_push(
543 543
544 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); 544 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
545 if (error) 545 if (error)
546 goto out_abort; 546 goto out_abort_free_ticket;
547 547
548 /* 548 /*
549 * now that we've written the checkpoint into the log, strictly 549 * now that we've written the checkpoint into the log, strictly
@@ -569,8 +569,9 @@ restart:
569 } 569 }
570 spin_unlock(&cil->xc_cil_lock); 570 spin_unlock(&cil->xc_cil_lock);
571 571
572 /* xfs_log_done always frees the ticket on error. */
572 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); 573 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
573 if (error || commit_lsn == -1) 574 if (commit_lsn == -1)
574 goto out_abort; 575 goto out_abort;
575 576
576 /* attach all the transactions w/ busy extents to iclog */ 577 /* attach all the transactions w/ busy extents to iclog */
@@ -600,6 +601,8 @@ out_free_ticket:
600 kmem_free(new_ctx); 601 kmem_free(new_ctx);
601 return 0; 602 return 0;
602 603
604out_abort_free_ticket:
605 xfs_log_ticket_put(tic);
603out_abort: 606out_abort:
604 xlog_cil_committed(ctx, XFS_LI_ABORTED); 607 xlog_cil_committed(ctx, XFS_LI_ABORTED);
605 return XFS_ERROR(EIO); 608 return XFS_ERROR(EIO);
@@ -622,7 +625,7 @@ out_abort:
622 * background commit, returns without it held once background commits are 625 * background commit, returns without it held once background commits are
623 * allowed again. 626 * allowed again.
624 */ 627 */
625int 628void
626xfs_log_commit_cil( 629xfs_log_commit_cil(
627 struct xfs_mount *mp, 630 struct xfs_mount *mp,
628 struct xfs_trans *tp, 631 struct xfs_trans *tp,
@@ -637,11 +640,6 @@ xfs_log_commit_cil(
637 if (flags & XFS_TRANS_RELEASE_LOG_RES) 640 if (flags & XFS_TRANS_RELEASE_LOG_RES)
638 log_flags = XFS_LOG_REL_PERM_RESERV; 641 log_flags = XFS_LOG_REL_PERM_RESERV;
639 642
640 if (XLOG_FORCED_SHUTDOWN(log)) {
641 xlog_cil_free_logvec(log_vector);
642 return XFS_ERROR(EIO);
643 }
644
645 /* 643 /*
646 * do all the hard work of formatting items (including memory 644 * do all the hard work of formatting items (including memory
647 * allocation) outside the CIL context lock. This prevents stalling CIL 645 * allocation) outside the CIL context lock. This prevents stalling CIL
@@ -701,7 +699,6 @@ xfs_log_commit_cil(
701 */ 699 */
702 if (push) 700 if (push)
703 xlog_cil_push(log, 0); 701 xlog_cil_push(log, 0);
704 return 0;
705} 702}
706 703
707/* 704/*
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 33dbc4e0ad62..76922793f64f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert(
1446 * Bulk operation version of xfs_trans_committed that takes a log vector of 1446 * Bulk operation version of xfs_trans_committed that takes a log vector of
1447 * items to insert into the AIL. This uses bulk AIL insertion techniques to 1447 * items to insert into the AIL. This uses bulk AIL insertion techniques to
1448 * minimise lock traffic. 1448 * minimise lock traffic.
1449 *
1450 * If we are called with the aborted flag set, it is because a log write during
1451 * a CIL checkpoint commit has failed. In this case, all the items in the
1452 * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which
1453 * means that checkpoint commit abort handling is treated exactly the same
1454 * as an iclog write error even though we haven't started any IO yet. Hence in
1455 * this case all we need to do is IOP_COMMITTED processing, followed by an
1456 * IOP_UNPIN(aborted) call.
1449 */ 1457 */
1450void 1458void
1451xfs_trans_committed_bulk( 1459xfs_trans_committed_bulk(
@@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk(
1472 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 1480 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1473 continue; 1481 continue;
1474 1482
1483 /*
1484 * if we are aborting the operation, no point in inserting the
1485 * object into the AIL as we are in a shutdown situation.
1486 */
1487 if (aborted) {
1488 ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
1489 IOP_UNPIN(lip, 1);
1490 continue;
1491 }
1492
1475 if (item_lsn != commit_lsn) { 1493 if (item_lsn != commit_lsn) {
1476 1494
1477 /* 1495 /*
@@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk(
1503} 1521}
1504 1522
1505/* 1523/*
1506 * Called from the trans_commit code when we notice that 1524 * Called from the trans_commit code when we notice that the filesystem is in
1507 * the filesystem is in the middle of a forced shutdown. 1525 * the middle of a forced shutdown.
1526 *
1527 * When we are called here, we have already pinned all the items in the
1528 * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
1529 * so we can simply walk the items in the transaction, unpin them with an abort
1530 * flag and then free the items. Note that unpinning the items can result in
1531 * them being freed immediately, so we need to use a safe list traversal method
1532 * here.
1508 */ 1533 */
1509STATIC void 1534STATIC void
1510xfs_trans_uncommit( 1535xfs_trans_uncommit(
1511 struct xfs_trans *tp, 1536 struct xfs_trans *tp,
1512 uint flags) 1537 uint flags)
1513{ 1538{
1514 struct xfs_log_item_desc *lidp; 1539 struct xfs_log_item_desc *lidp, *n;
1515 1540
1516 list_for_each_entry(lidp, &tp->t_items, lid_trans) { 1541 list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
1517 /*
1518 * Unpin all but those that aren't dirty.
1519 */
1520 if (lidp->lid_flags & XFS_LID_DIRTY) 1542 if (lidp->lid_flags & XFS_LID_DIRTY)
1521 IOP_UNPIN(lidp->lid_item, 1); 1543 IOP_UNPIN(lidp->lid_item, 1);
1522 } 1544 }
@@ -1733,7 +1755,6 @@ xfs_trans_commit_cil(
1733 int flags) 1755 int flags)
1734{ 1756{
1735 struct xfs_log_vec *log_vector; 1757 struct xfs_log_vec *log_vector;
1736 int error;
1737 1758
1738 /* 1759 /*
1739 * Get each log item to allocate a vector structure for 1760 * Get each log item to allocate a vector structure for
@@ -1744,9 +1765,7 @@ xfs_trans_commit_cil(
1744 if (!log_vector) 1765 if (!log_vector)
1745 return ENOMEM; 1766 return ENOMEM;
1746 1767
1747 error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); 1768 xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
1748 if (error)
1749 return error;
1750 1769
1751 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1770 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1752 xfs_trans_free(tp); 1771 xfs_trans_free(tp);