aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2015-02-18 10:28:37 -0500
committerTrond Myklebust <trond.myklebust@primarydata.com>2015-02-18 10:28:37 -0500
commit65d2918e716afb89359cfa59734d76c1ff8700cb (patch)
tree4685404f96642243d62c3a1a823340913d087090 /fs
parentbf40e5561fd288a505d5d8d8bf45eef96fe7253d (diff)
parent338d00cfef07d74a072f96821c64b20f98517d72 (diff)
Merge branch 'cleanups'
Merge cleanups requested by Linus. * cleanups: (3 commits) pnfs: Refactor the *_layout_mark_request_commit to use pnfs_layout_mark_request_commit nfs: Can call nfs_clear_page_commit() instead nfs: Provide and use helper functions for marking a page as unstable
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/afs/rxrpc.c14
-rw-r--r--fs/afs/volume.c2
-rw-r--r--fs/aio.c20
-rw-r--r--fs/block_dev.c37
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c4
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/scrub.c4
-rw-r--r--fs/btrfs/super.c14
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/tree-log.c1
-rw-r--r--fs/ceph/addr.c1
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/locks.c64
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/super.c20
-rw-r--r--fs/char_dev.c24
-rw-r--r--fs/cifs/cifs_debug.c6
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/file.c41
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/ioctl.c21
-rw-r--r--fs/cifs/smbencrypt.c2
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/configfs/configfs_internal.h2
-rw-r--r--fs/configfs/inode.c17
-rw-r--r--fs/configfs/mount.c11
-rw-r--r--fs/dlm/netlink.c7
-rw-r--r--fs/ecryptfs/inode.c1
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/efivarfs/Kconfig1
-rw-r--r--fs/efivarfs/super.c2
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/ext4/super.c44
-rw-r--r--fs/f2fs/file.c1
-rw-r--r--fs/fs-writeback.c14
-rw-r--r--fs/fuse/file.c11
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/gfs2/acl.c2
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/dir.c3
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/gfs2/glock.c14
-rw-r--r--fs/gfs2/inode.c3
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/quota.c49
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/sys.c2
-rw-r--r--fs/hugetlbfs/inode.c13
-rw-r--r--fs/inode.c17
-rw-r--r--fs/ioctl.c5
-rw-r--r--fs/isofs/util.c18
-rw-r--r--fs/jfs/endian24.h49
-rw-r--r--fs/jfs/jfs_dtree.c4
-rw-r--r--fs/jfs/jfs_types.h55
-rw-r--r--fs/jfs/jfs_xtree.h25
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/kernfs/inode.c13
-rw-r--r--fs/kernfs/kernfs-internal.h1
-rw-r--r--fs/kernfs/mount.c1
-rw-r--r--fs/lockd/svclock.c4
-rw-r--r--fs/lockd/svcsubs.c26
-rw-r--r--fs/lockd/xdr.c8
-rw-r--r--fs/locks.c593
-rw-r--r--fs/ncpfs/inode.c3
-rw-r--r--fs/nfs/delegation.c23
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/filelayout/filelayout.c53
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c43
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/nfs4state.c70
-rw-r--r--fs/nfs/nfs4super.c1
-rw-r--r--fs/nfs/pagelist.c6
-rw-r--r--fs/nfs/pnfs.h4
-rw-r--r--fs/nfs/pnfs_nfs.c30
-rw-r--r--fs/nfs/super.c24
-rw-r--r--fs/nfs/write.c59
-rw-r--r--fs/nfsd/Kconfig10
-rw-r--r--fs/nfsd/Makefile8
-rw-r--r--fs/nfsd/blocklayout.c189
-rw-r--r--fs/nfsd/blocklayoutxdr.c157
-rw-r--r--fs/nfsd/blocklayoutxdr.h62
-rw-r--r--fs/nfsd/export.c8
-rw-r--r--fs/nfsd/export.h2
-rw-r--r--fs/nfsd/nfs4callback.c99
-rw-r--r--fs/nfsd/nfs4layouts.c721
-rw-r--r--fs/nfsd/nfs4proc.c310
-rw-r--r--fs/nfsd/nfs4state.c97
-rw-r--r--fs/nfsd/nfs4xdr.c362
-rw-r--r--fs/nfsd/nfsctl.c9
-rw-r--r--fs/nfsd/nfsd.h16
-rw-r--r--fs/nfsd/nfsfh.h18
-rw-r--r--fs/nfsd/nfssvc.c1
-rw-r--r--fs/nfsd/pnfs.h81
-rw-r--r--fs/nfsd/state.h43
-rw-r--r--fs/nfsd/trace.c5
-rw-r--r--fs/nfsd/trace.h54
-rw-r--r--fs/nfsd/xdr4.h59
-rw-r--r--fs/nfsd/xdr4cb.h7
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c1
-rw-r--r--fs/nilfs2/mdt.c6
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/page.c4
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/nilfs2/segment.c44
-rw-r--r--fs/nilfs2/segment.h5
-rw-r--r--fs/nilfs2/super.c6
-rw-r--r--fs/notify/Kconfig1
-rw-r--r--fs/notify/fanotify/fanotify.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c35
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/acl.c14
-rw-r--r--fs/ocfs2/alloc.c18
-rw-r--r--fs/ocfs2/cluster/tcp.c3
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h12
-rw-r--r--fs/ocfs2/dir.c10
-rw-r--r--fs/ocfs2/dlm/dlmast.c6
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c14
-rw-r--r--fs/ocfs2/dlm/dlmdomain.h1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c7
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/dlmglue.c3
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/journal.c1
-rw-r--r--fs/ocfs2/mmap.c1
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_local.c20
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/reservations.c2
-rw-r--r--fs/ocfs2/super.c49
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/proc/page.c16
-rw-r--r--fs/proc/task_mmu.c234
-rw-r--r--fs/pstore/Kconfig10
-rw-r--r--fs/pstore/Makefile2
-rw-r--r--fs/pstore/inode.c26
-rw-r--r--fs/pstore/internal.h6
-rw-r--r--fs/pstore/platform.c5
-rw-r--r--fs/pstore/pmsg.c114
-rw-r--r--fs/pstore/ram.c53
-rw-r--r--fs/quota/Kconfig1
-rw-r--r--fs/quota/dquot.c186
-rw-r--r--fs/quota/quota.c214
-rw-r--r--fs/quota/quota_v1.c4
-rw-r--r--fs/quota/quota_v2.c16
-rw-r--r--fs/ramfs/file-nommu.c7
-rw-r--r--fs/ramfs/inode.c21
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/romfs/mmap-nommu.c10
-rw-r--r--fs/romfs/super.c3
-rw-r--r--fs/super.c12
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ubifs/super.c5
-rw-r--r--fs/udf/Kconfig10
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c32
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/xfs/kmem.c10
-rw-r--r--fs/xfs/kmem.h5
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c20
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h33
-rw-r--r--fs/xfs/libxfs/xfs_format.h24
-rw-r--r--fs/xfs/libxfs/xfs_fs.h (renamed from fs/xfs/xfs_fs.h)0
-rw-r--r--fs/xfs/libxfs/xfs_sb.c320
-rw-r--r--fs/xfs/libxfs/xfs_sb.h11
-rw-r--r--fs/xfs/libxfs/xfs_shared.h33
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c14
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h1
-rw-r--r--fs/xfs/libxfs/xfs_types.h (renamed from fs/xfs/xfs_types.h)0
-rw-r--r--fs/xfs/xfs_aops.c149
-rw-r--r--fs/xfs/xfs_aops.h3
-rw-r--r--fs/xfs/xfs_bmap_util.h37
-rw-r--r--fs/xfs/xfs_buf_item.c6
-rw-r--r--fs/xfs/xfs_dquot.h2
-rw-r--r--fs/xfs/xfs_file.c67
-rw-r--r--fs/xfs/xfs_fsops.c34
-rw-r--r--fs/xfs/xfs_inode.c136
-rw-r--r--fs/xfs/xfs_inode.h11
-rw-r--r--fs/xfs/xfs_ioctl.c501
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_iomap.h2
-rw-r--r--fs/xfs/xfs_iops.c21
-rw-r--r--fs/xfs/xfs_log.c28
-rw-r--r--fs/xfs/xfs_mount.c107
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c43
-rw-r--r--fs/xfs/xfs_qm.h5
-rw-r--r--fs/xfs/xfs_qm_syscalls.c244
-rw-r--r--fs/xfs/xfs_quotaops.c67
-rw-r--r--fs/xfs/xfs_super.c20
-rw-r--r--fs/xfs/xfs_sysctl.c18
-rw-r--r--fs/xfs/xfs_trans.c1
-rw-r--r--fs/xfs/xfs_trans_buf.c5
214 files changed, 4768 insertions, 2538 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6894b085f0ee..620d93489539 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -335,7 +335,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
335 } 335 }
336 init_rwsem(&v9ses->rename_sem); 336 init_rwsem(&v9ses->rename_sem);
337 337
338 rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY); 338 rc = bdi_setup_and_register(&v9ses->bdi, "9p");
339 if (rc) { 339 if (rc) {
340 kfree(v9ses->aname); 340 kfree(v9ses->aname);
341 kfree(v9ses->uname); 341 kfree(v9ses->uname);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 5594505e6e73..b40133796b87 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -831,7 +831,6 @@ static const struct vm_operations_struct v9fs_file_vm_ops = {
831 .fault = filemap_fault, 831 .fault = filemap_fault,
832 .map_pages = filemap_map_pages, 832 .map_pages = filemap_map_pages,
833 .page_mkwrite = v9fs_vm_page_mkwrite, 833 .page_mkwrite = v9fs_vm_page_mkwrite,
834 .remap_pages = generic_file_remap_pages,
835}; 834};
836 835
837static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { 836static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
@@ -839,7 +838,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
839 .fault = filemap_fault, 838 .fault = filemap_fault,
840 .map_pages = filemap_map_pages, 839 .map_pages = filemap_map_pages,
841 .page_mkwrite = v9fs_vm_page_mkwrite, 840 .page_mkwrite = v9fs_vm_page_mkwrite,
842 .remap_pages = generic_file_remap_pages,
843}; 841};
844 842
845 843
diff --git a/fs/Kconfig b/fs/Kconfig
index 664991afe0c0..a6bb530b1ec5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -165,6 +165,7 @@ config HUGETLB_PAGE
165 def_bool HUGETLBFS 165 def_bool HUGETLBFS
166 166
167source "fs/configfs/Kconfig" 167source "fs/configfs/Kconfig"
168source "fs/efivarfs/Kconfig"
168 169
169endmenu 170endmenu
170 171
@@ -209,7 +210,6 @@ source "fs/sysv/Kconfig"
209source "fs/ufs/Kconfig" 210source "fs/ufs/Kconfig"
210source "fs/exofs/Kconfig" 211source "fs/exofs/Kconfig"
211source "fs/f2fs/Kconfig" 212source "fs/f2fs/Kconfig"
212source "fs/efivarfs/Kconfig"
213 213
214endif # MISC_FILESYSTEMS 214endif # MISC_FILESYSTEMS
215 215
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 06e14bfb3496..dbc732e9a5c0 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
306 306
307 _debug("- range %u-%u%s", 307 _debug("- range %u-%u%s",
308 offset, to, msg->msg_flags ? " [more]" : ""); 308 offset, to, msg->msg_flags ? " [more]" : "");
309 iov_iter_init(&msg->msg_iter, WRITE, 309 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
310 (struct iovec *) iov, 1, to - offset); 310 iov, 1, to - offset);
311 311
312 /* have to change the state *before* sending the last 312 /* have to change the state *before* sending the last
313 * packet as RxRPC might give us the reply before it 313 * packet as RxRPC might give us the reply before it
@@ -384,7 +384,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
384 384
385 msg.msg_name = NULL; 385 msg.msg_name = NULL;
386 msg.msg_namelen = 0; 386 msg.msg_namelen = 0;
387 iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1, 387 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
388 call->request_size); 388 call->request_size);
389 msg.msg_control = NULL; 389 msg.msg_control = NULL;
390 msg.msg_controllen = 0; 390 msg.msg_controllen = 0;
@@ -770,7 +770,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
770void afs_send_empty_reply(struct afs_call *call) 770void afs_send_empty_reply(struct afs_call *call)
771{ 771{
772 struct msghdr msg; 772 struct msghdr msg;
773 struct iovec iov[1]; 773 struct kvec iov[1];
774 774
775 _enter(""); 775 _enter("");
776 776
@@ -778,7 +778,7 @@ void afs_send_empty_reply(struct afs_call *call)
778 iov[0].iov_len = 0; 778 iov[0].iov_len = 0;
779 msg.msg_name = NULL; 779 msg.msg_name = NULL;
780 msg.msg_namelen = 0; 780 msg.msg_namelen = 0;
781 iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0); /* WTF? */ 781 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */
782 msg.msg_control = NULL; 782 msg.msg_control = NULL;
783 msg.msg_controllen = 0; 783 msg.msg_controllen = 0;
784 msg.msg_flags = 0; 784 msg.msg_flags = 0;
@@ -805,7 +805,7 @@ void afs_send_empty_reply(struct afs_call *call)
805void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) 805void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
806{ 806{
807 struct msghdr msg; 807 struct msghdr msg;
808 struct iovec iov[1]; 808 struct kvec iov[1];
809 int n; 809 int n;
810 810
811 _enter(""); 811 _enter("");
@@ -814,7 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
814 iov[0].iov_len = len; 814 iov[0].iov_len = len;
815 msg.msg_name = NULL; 815 msg.msg_name = NULL;
816 msg.msg_namelen = 0; 816 msg.msg_namelen = 0;
817 iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len); 817 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
818 msg.msg_control = NULL; 818 msg.msg_control = NULL;
819 msg.msg_controllen = 0; 819 msg.msg_controllen = 0;
820 msg.msg_flags = 0; 820 msg.msg_flags = 0;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 2b607257820c..d142a2449e65 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,7 +106,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
106 volume->cell = params->cell; 106 volume->cell = params->cell;
107 volume->vid = vlocation->vldb.vid[params->type]; 107 volume->vid = vlocation->vldb.vid[params->type];
108 108
109 ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY); 109 ret = bdi_setup_and_register(&volume->bdi, "afs");
110 if (ret) 110 if (ret)
111 goto error_bdi; 111 goto error_bdi;
112 112
diff --git a/fs/aio.c b/fs/aio.c
index 1b7893ecc296..118a2e0088d8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -165,15 +165,6 @@ static struct vfsmount *aio_mnt;
165static const struct file_operations aio_ring_fops; 165static const struct file_operations aio_ring_fops;
166static const struct address_space_operations aio_ctx_aops; 166static const struct address_space_operations aio_ctx_aops;
167 167
168/* Backing dev info for aio fs.
169 * -no dirty page accounting or writeback happens
170 */
171static struct backing_dev_info aio_fs_backing_dev_info = {
172 .name = "aiofs",
173 .state = 0,
174 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY,
175};
176
177static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) 168static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
178{ 169{
179 struct qstr this = QSTR_INIT("[aio]", 5); 170 struct qstr this = QSTR_INIT("[aio]", 5);
@@ -185,7 +176,6 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
185 176
186 inode->i_mapping->a_ops = &aio_ctx_aops; 177 inode->i_mapping->a_ops = &aio_ctx_aops;
187 inode->i_mapping->private_data = ctx; 178 inode->i_mapping->private_data = ctx;
188 inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info;
189 inode->i_size = PAGE_SIZE * nr_pages; 179 inode->i_size = PAGE_SIZE * nr_pages;
190 180
191 path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); 181 path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
@@ -230,9 +220,6 @@ static int __init aio_setup(void)
230 if (IS_ERR(aio_mnt)) 220 if (IS_ERR(aio_mnt))
231 panic("Failed to create aio fs mount."); 221 panic("Failed to create aio fs mount.");
232 222
233 if (bdi_init(&aio_fs_backing_dev_info))
234 panic("Failed to init aio fs backing dev info.");
235
236 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 223 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
237 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 224 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
238 225
@@ -1140,6 +1127,13 @@ static long aio_read_events_ring(struct kioctx *ctx,
1140 long ret = 0; 1127 long ret = 0;
1141 int copy_ret; 1128 int copy_ret;
1142 1129
1130 /*
1131 * The mutex can block and wake us up and that will cause
1132 * wait_event_interruptible_hrtimeout() to schedule without sleeping
1133 * and repeat. This should be rare enough that it doesn't cause
1134 * peformance issues. See the comment in read_events() for more detail.
1135 */
1136 sched_annotate_sleep();
1143 mutex_lock(&ctx->ring_lock); 1137 mutex_lock(&ctx->ring_lock);
1144 1138
1145 /* Access to ->ring_pages here is protected by ctx->ring_lock. */ 1139 /* Access to ->ring_pages here is protected by ctx->ring_lock. */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b48c41bf0f86..a9f92794d7a0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -49,23 +49,15 @@ inline struct block_device *I_BDEV(struct inode *inode)
49} 49}
50EXPORT_SYMBOL(I_BDEV); 50EXPORT_SYMBOL(I_BDEV);
51 51
52/* 52static void bdev_write_inode(struct inode *inode)
53 * Move the inode from its current bdi to a new bdi. Make sure the inode
54 * is clean before moving so that it doesn't linger on the old bdi.
55 */
56static void bdev_inode_switch_bdi(struct inode *inode,
57 struct backing_dev_info *dst)
58{ 53{
59 while (true) { 54 spin_lock(&inode->i_lock);
60 spin_lock(&inode->i_lock); 55 while (inode->i_state & I_DIRTY) {
61 if (!(inode->i_state & I_DIRTY)) {
62 inode->i_data.backing_dev_info = dst;
63 spin_unlock(&inode->i_lock);
64 return;
65 }
66 spin_unlock(&inode->i_lock); 56 spin_unlock(&inode->i_lock);
67 WARN_ON_ONCE(write_inode_now(inode, true)); 57 WARN_ON_ONCE(write_inode_now(inode, true));
58 spin_lock(&inode->i_lock);
68 } 59 }
60 spin_unlock(&inode->i_lock);
69} 61}
70 62
71/* Kill _all_ buffers and pagecache , dirty or not.. */ 63/* Kill _all_ buffers and pagecache , dirty or not.. */
@@ -584,7 +576,6 @@ struct block_device *bdget(dev_t dev)
584 inode->i_bdev = bdev; 576 inode->i_bdev = bdev;
585 inode->i_data.a_ops = &def_blk_aops; 577 inode->i_data.a_ops = &def_blk_aops;
586 mapping_set_gfp_mask(&inode->i_data, GFP_USER); 578 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
587 inode->i_data.backing_dev_info = &default_backing_dev_info;
588 spin_lock(&bdev_lock); 579 spin_lock(&bdev_lock);
589 list_add(&bdev->bd_list, &all_bdevs); 580 list_add(&bdev->bd_list, &all_bdevs);
590 spin_unlock(&bdev_lock); 581 spin_unlock(&bdev_lock);
@@ -1145,8 +1136,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1145 bdev->bd_queue = disk->queue; 1136 bdev->bd_queue = disk->queue;
1146 bdev->bd_contains = bdev; 1137 bdev->bd_contains = bdev;
1147 if (!partno) { 1138 if (!partno) {
1148 struct backing_dev_info *bdi;
1149
1150 ret = -ENXIO; 1139 ret = -ENXIO;
1151 bdev->bd_part = disk_get_part(disk, partno); 1140 bdev->bd_part = disk_get_part(disk, partno);
1152 if (!bdev->bd_part) 1141 if (!bdev->bd_part)
@@ -1172,11 +1161,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1172 } 1161 }
1173 } 1162 }
1174 1163
1175 if (!ret) { 1164 if (!ret)
1176 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1165 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1177 bdi = blk_get_backing_dev_info(bdev);
1178 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1179 }
1180 1166
1181 /* 1167 /*
1182 * If the device is invalidated, rescan partition 1168 * If the device is invalidated, rescan partition
@@ -1203,8 +1189,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1203 if (ret) 1189 if (ret)
1204 goto out_clear; 1190 goto out_clear;
1205 bdev->bd_contains = whole; 1191 bdev->bd_contains = whole;
1206 bdev_inode_switch_bdi(bdev->bd_inode,
1207 whole->bd_inode->i_data.backing_dev_info);
1208 bdev->bd_part = disk_get_part(disk, partno); 1192 bdev->bd_part = disk_get_part(disk, partno);
1209 if (!(disk->flags & GENHD_FL_UP) || 1193 if (!(disk->flags & GENHD_FL_UP) ||
1210 !bdev->bd_part || !bdev->bd_part->nr_sects) { 1194 !bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1244,7 +1228,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1244 bdev->bd_disk = NULL; 1228 bdev->bd_disk = NULL;
1245 bdev->bd_part = NULL; 1229 bdev->bd_part = NULL;
1246 bdev->bd_queue = NULL; 1230 bdev->bd_queue = NULL;
1247 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1248 if (bdev != bdev->bd_contains) 1231 if (bdev != bdev->bd_contains)
1249 __blkdev_put(bdev->bd_contains, mode, 1); 1232 __blkdev_put(bdev->bd_contains, mode, 1);
1250 bdev->bd_contains = NULL; 1233 bdev->bd_contains = NULL;
@@ -1464,11 +1447,11 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1464 WARN_ON_ONCE(bdev->bd_holders); 1447 WARN_ON_ONCE(bdev->bd_holders);
1465 sync_blockdev(bdev); 1448 sync_blockdev(bdev);
1466 kill_bdev(bdev); 1449 kill_bdev(bdev);
1467 /* ->release can cause the old bdi to disappear, 1450 /*
1468 * so must switch it out first 1451 * ->release can cause the queue to disappear, so flush all
1452 * dirty data before.
1469 */ 1453 */
1470 bdev_inode_switch_bdi(bdev->bd_inode, 1454 bdev_write_inode(bdev->bd_inode);
1471 &default_backing_dev_info);
1472 } 1455 }
1473 if (bdev->bd_contains == bdev) { 1456 if (bdev->bd_contains == bdev) {
1474 if (disk->fops->release) 1457 if (disk->fops->release)
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index a66768ebc8d1..80e9c18ea64f 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -8,6 +8,7 @@ config BTRFS_FS
8 select LZO_DECOMPRESS 8 select LZO_DECOMPRESS
9 select RAID6_PQ 9 select RAID6_PQ
10 select XOR_BLOCKS 10 select XOR_BLOCKS
11 select SRCU
11 12
12 help 13 help
13 Btrfs is a general purpose copy-on-write filesystem with extents, 14 Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7e607416755a..0b180708bf79 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1171,6 +1171,7 @@ struct btrfs_space_info {
1171 struct percpu_counter total_bytes_pinned; 1171 struct percpu_counter total_bytes_pinned;
1172 1172
1173 struct list_head list; 1173 struct list_head list;
1174 /* Protected by the spinlock 'lock'. */
1174 struct list_head ro_bgs; 1175 struct list_head ro_bgs;
1175 1176
1176 struct rw_semaphore groups_sem; 1177 struct rw_semaphore groups_sem;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8c63419a7f70..1afb18226da8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1715,12 +1715,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1715{ 1715{
1716 int err; 1716 int err;
1717 1717
1718 bdi->capabilities = BDI_CAP_MAP_COPY; 1718 err = bdi_setup_and_register(bdi, "btrfs");
1719 err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1720 if (err) 1719 if (err)
1721 return err; 1720 return err;
1722 1721
1723 bdi->ra_pages = default_backing_dev_info.ra_pages; 1722 bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
1724 bdi->congested_fn = btrfs_congested_fn; 1723 bdi->congested_fn = btrfs_congested_fn;
1725 bdi->congested_data = info; 1724 bdi->congested_data = info;
1726 return 0; 1725 return 0;
@@ -2319,7 +2318,6 @@ int open_ctree(struct super_block *sb,
2319 */ 2318 */
2320 fs_info->btree_inode->i_size = OFFSET_MAX; 2319 fs_info->btree_inode->i_size = OFFSET_MAX;
2321 fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 2320 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2322 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
2323 2321
2324 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 2322 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2325 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 2323 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 15116585e714..a684086c3c81 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9422,7 +9422,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9422 * are still on the list after taking the semaphore 9422 * are still on the list after taking the semaphore
9423 */ 9423 */
9424 list_del_init(&block_group->list); 9424 list_del_init(&block_group->list);
9425 list_del_init(&block_group->ro_list);
9426 if (list_empty(&block_group->space_info->block_groups[index])) { 9425 if (list_empty(&block_group->space_info->block_groups[index])) {
9427 kobj = block_group->space_info->block_group_kobjs[index]; 9426 kobj = block_group->space_info->block_group_kobjs[index];
9428 block_group->space_info->block_group_kobjs[index] = NULL; 9427 block_group->space_info->block_group_kobjs[index] = NULL;
@@ -9464,6 +9463,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9464 btrfs_remove_free_space_cache(block_group); 9463 btrfs_remove_free_space_cache(block_group);
9465 9464
9466 spin_lock(&block_group->space_info->lock); 9465 spin_lock(&block_group->space_info->lock);
9466 list_del_init(&block_group->ro_list);
9467 block_group->space_info->total_bytes -= block_group->key.offset; 9467 block_group->space_info->total_bytes -= block_group->key.offset;
9468 block_group->space_info->bytes_readonly -= block_group->key.offset; 9468 block_group->space_info->bytes_readonly -= block_group->key.offset;
9469 block_group->space_info->disk_total -= block_group->key.offset * factor; 9469 block_group->space_info->disk_total -= block_group->key.offset * factor;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ebabd237153..c73df6a7c9b6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1407,8 +1407,8 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1407 while (index <= end_index) { 1407 while (index <= end_index) {
1408 page = find_get_page(inode->i_mapping, index); 1408 page = find_get_page(inode->i_mapping, index);
1409 BUG_ON(!page); /* Pages should be in the extent_io_tree */ 1409 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1410 account_page_redirty(page);
1411 __set_page_dirty_nobuffers(page); 1410 __set_page_dirty_nobuffers(page);
1411 account_page_redirty(page);
1412 page_cache_release(page); 1412 page_cache_release(page);
1413 index++; 1413 index++;
1414 } 1414 }
@@ -2190,7 +2190,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2190 2190
2191 next = next_state(state); 2191 next = next_state(state);
2192 2192
2193 failrec = (struct io_failure_record *)state->private; 2193 failrec = (struct io_failure_record *)(unsigned long)state->private;
2194 free_extent_state(state); 2194 free_extent_state(state);
2195 kfree(failrec); 2195 kfree(failrec);
2196 2196
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e4090259569b..b78bbbac900d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1746,7 +1746,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1746 1746
1747 mutex_lock(&inode->i_mutex); 1747 mutex_lock(&inode->i_mutex);
1748 1748
1749 current->backing_dev_info = inode->i_mapping->backing_dev_info; 1749 current->backing_dev_info = inode_to_bdi(inode);
1750 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1750 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1751 if (err) { 1751 if (err) {
1752 mutex_unlock(&inode->i_mutex); 1752 mutex_unlock(&inode->i_mutex);
@@ -2081,7 +2081,6 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
2081 .fault = filemap_fault, 2081 .fault = filemap_fault,
2082 .map_pages = filemap_map_pages, 2082 .map_pages = filemap_map_pages,
2083 .page_mkwrite = btrfs_page_mkwrite, 2083 .page_mkwrite = btrfs_page_mkwrite,
2084 .remap_pages = generic_file_remap_pages,
2085}; 2084};
2086 2085
2087static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 2086static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8bf326affb94..54bcf639d1cf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3608,7 +3608,6 @@ cache_acl:
3608 switch (inode->i_mode & S_IFMT) { 3608 switch (inode->i_mode & S_IFMT) {
3609 case S_IFREG: 3609 case S_IFREG:
3610 inode->i_mapping->a_ops = &btrfs_aops; 3610 inode->i_mapping->a_ops = &btrfs_aops;
3611 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3612 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 3611 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3613 inode->i_fop = &btrfs_file_operations; 3612 inode->i_fop = &btrfs_file_operations;
3614 inode->i_op = &btrfs_file_inode_operations; 3613 inode->i_op = &btrfs_file_inode_operations;
@@ -3623,7 +3622,6 @@ cache_acl:
3623 case S_IFLNK: 3622 case S_IFLNK:
3624 inode->i_op = &btrfs_symlink_inode_operations; 3623 inode->i_op = &btrfs_symlink_inode_operations;
3625 inode->i_mapping->a_ops = &btrfs_symlink_aops; 3624 inode->i_mapping->a_ops = &btrfs_symlink_aops;
3626 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3627 break; 3625 break;
3628 default: 3626 default:
3629 inode->i_op = &btrfs_special_inode_operations; 3627 inode->i_op = &btrfs_special_inode_operations;
@@ -6088,7 +6086,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
6088 inode->i_fop = &btrfs_file_operations; 6086 inode->i_fop = &btrfs_file_operations;
6089 inode->i_op = &btrfs_file_inode_operations; 6087 inode->i_op = &btrfs_file_inode_operations;
6090 inode->i_mapping->a_ops = &btrfs_aops; 6088 inode->i_mapping->a_ops = &btrfs_aops;
6091 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
6092 6089
6093 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 6090 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6094 if (err) 6091 if (err)
@@ -9203,7 +9200,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9203 inode->i_fop = &btrfs_file_operations; 9200 inode->i_fop = &btrfs_file_operations;
9204 inode->i_op = &btrfs_file_inode_operations; 9201 inode->i_op = &btrfs_file_inode_operations;
9205 inode->i_mapping->a_ops = &btrfs_aops; 9202 inode->i_mapping->a_ops = &btrfs_aops;
9206 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9207 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 9203 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9208 9204
9209 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 9205 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
@@ -9247,7 +9243,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9247 9243
9248 inode->i_op = &btrfs_symlink_inode_operations; 9244 inode->i_op = &btrfs_symlink_inode_operations;
9249 inode->i_mapping->a_ops = &btrfs_symlink_aops; 9245 inode->i_mapping->a_ops = &btrfs_symlink_aops;
9250 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9251 inode_set_bytes(inode, name_len); 9246 inode_set_bytes(inode, name_len);
9252 btrfs_i_size_write(inode, name_len); 9247 btrfs_i_size_write(inode, name_len);
9253 err = btrfs_update_inode(trans, root, inode); 9248 err = btrfs_update_inode(trans, root, inode);
@@ -9459,7 +9454,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
9459 inode->i_op = &btrfs_file_inode_operations; 9454 inode->i_op = &btrfs_file_inode_operations;
9460 9455
9461 inode->i_mapping->a_ops = &btrfs_aops; 9456 inode->i_mapping->a_ops = &btrfs_aops;
9462 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9463 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 9457 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9464 9458
9465 ret = btrfs_init_inode_security(trans, inode, dir, NULL); 9459 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9e1569ffbf6e..e427cb7ee12c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3053,7 +3053,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3053 3053
3054 ppath = btrfs_alloc_path(); 3054 ppath = btrfs_alloc_path();
3055 if (!ppath) { 3055 if (!ppath) {
3056 btrfs_free_path(ppath); 3056 btrfs_free_path(path);
3057 return -ENOMEM; 3057 return -ENOMEM;
3058 } 3058 }
3059 3059
@@ -3065,6 +3065,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3065 path->search_commit_root = 1; 3065 path->search_commit_root = 1;
3066 path->skip_locking = 1; 3066 path->skip_locking = 1;
3067 3067
3068 ppath->search_commit_root = 1;
3069 ppath->skip_locking = 1;
3068 /* 3070 /*
3069 * trigger the readahead for extent tree csum tree and wait for 3071 * trigger the readahead for extent tree csum tree and wait for
3070 * completion. During readahead, the scrub is officially paused 3072 * completion. During readahead, the scrub is officially paused
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 60f7cbe815e9..6f49b2872a64 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1000,10 +1000,20 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
1000 */ 1000 */
1001 if (fs_info->pending_changes == 0) 1001 if (fs_info->pending_changes == 0)
1002 return 0; 1002 return 0;
1003 /*
1004 * A non-blocking test if the fs is frozen. We must not
1005 * start a new transaction here otherwise a deadlock
1006 * happens. The pending operations are delayed to the
1007 * next commit after thawing.
1008 */
1009 if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
1010 __sb_end_write(sb, SB_FREEZE_WRITE);
1011 else
1012 return 0;
1003 trans = btrfs_start_transaction(root, 0); 1013 trans = btrfs_start_transaction(root, 0);
1004 } else {
1005 return PTR_ERR(trans);
1006 } 1014 }
1015 if (IS_ERR(trans))
1016 return PTR_ERR(trans);
1007 } 1017 }
1008 return btrfs_commit_transaction(trans, root); 1018 return btrfs_commit_transaction(trans, root);
1009} 1019}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a605d4e2f2bc..e88b59d13439 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2118,7 +2118,7 @@ void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info)
2118 unsigned long prev; 2118 unsigned long prev;
2119 unsigned long bit; 2119 unsigned long bit;
2120 2120
2121 prev = cmpxchg(&fs_info->pending_changes, 0, 0); 2121 prev = xchg(&fs_info->pending_changes, 0);
2122 if (!prev) 2122 if (!prev)
2123 return; 2123 return;
2124 2124
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a02da16f2be..1a9585d4380a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2591,6 +2591,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2591 } 2591 }
2592 2592
2593 if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { 2593 if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
2594 blk_finish_plug(&plug);
2594 mutex_unlock(&log_root_tree->log_mutex); 2595 mutex_unlock(&log_root_tree->log_mutex);
2595 ret = root_log_ctx.log_ret; 2596 ret = root_log_ctx.log_ret;
2596 goto out; 2597 goto out;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c81c0e004588..24be059fd1f8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1569,7 +1569,6 @@ out:
1569static struct vm_operations_struct ceph_vmops = { 1569static struct vm_operations_struct ceph_vmops = {
1570 .fault = ceph_filemap_fault, 1570 .fault = ceph_filemap_fault,
1571 .page_mkwrite = ceph_page_mkwrite, 1571 .page_mkwrite = ceph_page_mkwrite,
1572 .remap_pages = generic_file_remap_pages,
1573}; 1572};
1574 1573
1575int ceph_mmap(struct file *file, struct vm_area_struct *vma) 1574int ceph_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ce74b394b49d..905986dd4c3c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -945,7 +945,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
945 mutex_lock(&inode->i_mutex); 945 mutex_lock(&inode->i_mutex);
946 946
947 /* We can write back this queue in page reclaim */ 947 /* We can write back this queue in page reclaim */
948 current->backing_dev_info = file->f_mapping->backing_dev_info; 948 current->backing_dev_info = inode_to_bdi(inode);
949 949
950 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 950 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
951 if (err) 951 if (err)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f61a74115beb..6b5173605154 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -783,8 +783,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
783 } 783 }
784 784
785 inode->i_mapping->a_ops = &ceph_aops; 785 inode->i_mapping->a_ops = &ceph_aops;
786 inode->i_mapping->backing_dev_info =
787 &ceph_sb_to_client(inode->i_sb)->backing_dev_info;
788 786
789 switch (inode->i_mode & S_IFMT) { 787 switch (inode->i_mode & S_IFMT) {
790 case S_IFIFO: 788 case S_IFIFO:
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index c35c5c614e38..06ea5cd05cd9 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -239,23 +239,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
239 return err; 239 return err;
240} 240}
241 241
242/** 242/*
243 * Must be called with lock_flocks() already held. Fills in the passed 243 * Fills in the passed counter variables, so you can prepare pagelist metadata
244 * counter variables, so you can prepare pagelist metadata before calling 244 * before calling ceph_encode_locks.
245 * ceph_encode_locks.
246 */ 245 */
247void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) 246void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
248{ 247{
249 struct file_lock *lock; 248 struct file_lock_context *ctx;
250 249
251 *fcntl_count = 0; 250 *fcntl_count = 0;
252 *flock_count = 0; 251 *flock_count = 0;
253 252
254 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 253 ctx = inode->i_flctx;
255 if (lock->fl_flags & FL_POSIX) 254 if (ctx) {
256 ++(*fcntl_count); 255 *fcntl_count = ctx->flc_posix_cnt;
257 else if (lock->fl_flags & FL_FLOCK) 256 *flock_count = ctx->flc_flock_cnt;
258 ++(*flock_count);
259 } 257 }
260 dout("counted %d flock locks and %d fcntl locks", 258 dout("counted %d flock locks and %d fcntl locks",
261 *flock_count, *fcntl_count); 259 *flock_count, *fcntl_count);
@@ -271,6 +269,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
271 int num_fcntl_locks, int num_flock_locks) 269 int num_fcntl_locks, int num_flock_locks)
272{ 270{
273 struct file_lock *lock; 271 struct file_lock *lock;
272 struct file_lock_context *ctx = inode->i_flctx;
274 int err = 0; 273 int err = 0;
275 int seen_fcntl = 0; 274 int seen_fcntl = 0;
276 int seen_flock = 0; 275 int seen_flock = 0;
@@ -279,33 +278,34 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
279 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 278 dout("encoding %d flock and %d fcntl locks", num_flock_locks,
280 num_fcntl_locks); 279 num_fcntl_locks);
281 280
282 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 281 if (!ctx)
283 if (lock->fl_flags & FL_POSIX) { 282 return 0;
284 ++seen_fcntl; 283
285 if (seen_fcntl > num_fcntl_locks) { 284 spin_lock(&ctx->flc_lock);
286 err = -ENOSPC; 285 list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
287 goto fail; 286 ++seen_fcntl;
288 } 287 if (seen_fcntl > num_fcntl_locks) {
289 err = lock_to_ceph_filelock(lock, &flocks[l]); 288 err = -ENOSPC;
290 if (err) 289 goto fail;
291 goto fail;
292 ++l;
293 } 290 }
291 err = lock_to_ceph_filelock(lock, &flocks[l]);
292 if (err)
293 goto fail;
294 ++l;
294 } 295 }
295 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 296 list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
296 if (lock->fl_flags & FL_FLOCK) { 297 ++seen_flock;
297 ++seen_flock; 298 if (seen_flock > num_flock_locks) {
298 if (seen_flock > num_flock_locks) { 299 err = -ENOSPC;
299 err = -ENOSPC; 300 goto fail;
300 goto fail;
301 }
302 err = lock_to_ceph_filelock(lock, &flocks[l]);
303 if (err)
304 goto fail;
305 ++l;
306 } 301 }
302 err = lock_to_ceph_filelock(lock, &flocks[l]);
303 if (err)
304 goto fail;
305 ++l;
307 } 306 }
308fail: 307fail:
308 spin_unlock(&ctx->flc_lock);
309 return err; 309 return err;
310} 310}
311 311
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d2171f4a6980..5f62fb7a5d0a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2700,20 +2700,16 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2700 struct ceph_filelock *flocks; 2700 struct ceph_filelock *flocks;
2701 2701
2702encode_again: 2702encode_again:
2703 spin_lock(&inode->i_lock);
2704 ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); 2703 ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
2705 spin_unlock(&inode->i_lock);
2706 flocks = kmalloc((num_fcntl_locks+num_flock_locks) * 2704 flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
2707 sizeof(struct ceph_filelock), GFP_NOFS); 2705 sizeof(struct ceph_filelock), GFP_NOFS);
2708 if (!flocks) { 2706 if (!flocks) {
2709 err = -ENOMEM; 2707 err = -ENOMEM;
2710 goto out_free; 2708 goto out_free;
2711 } 2709 }
2712 spin_lock(&inode->i_lock);
2713 err = ceph_encode_locks_to_buffer(inode, flocks, 2710 err = ceph_encode_locks_to_buffer(inode, flocks,
2714 num_fcntl_locks, 2711 num_fcntl_locks,
2715 num_flock_locks); 2712 num_flock_locks);
2716 spin_unlock(&inode->i_lock);
2717 if (err) { 2713 if (err) {
2718 kfree(flocks); 2714 kfree(flocks);
2719 if (err == -ENOSPC) 2715 if (err == -ENOSPC)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 50f06cddc94b..5ae62587a71d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -40,17 +40,6 @@ static void ceph_put_super(struct super_block *s)
40 40
41 dout("put_super\n"); 41 dout("put_super\n");
42 ceph_mdsc_close_sessions(fsc->mdsc); 42 ceph_mdsc_close_sessions(fsc->mdsc);
43
44 /*
45 * ensure we release the bdi before put_anon_super releases
46 * the device name.
47 */
48 if (s->s_bdi == &fsc->backing_dev_info) {
49 bdi_unregister(&fsc->backing_dev_info);
50 s->s_bdi = NULL;
51 }
52
53 return;
54} 43}
55 44
56static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) 45static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -910,7 +899,7 @@ static int ceph_register_bdi(struct super_block *sb,
910 >> PAGE_SHIFT; 899 >> PAGE_SHIFT;
911 else 900 else
912 fsc->backing_dev_info.ra_pages = 901 fsc->backing_dev_info.ra_pages =
913 default_backing_dev_info.ra_pages; 902 VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
914 903
915 err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", 904 err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
916 atomic_long_inc_return(&bdi_seq)); 905 atomic_long_inc_return(&bdi_seq));
@@ -1002,11 +991,16 @@ out_final:
1002static void ceph_kill_sb(struct super_block *s) 991static void ceph_kill_sb(struct super_block *s)
1003{ 992{
1004 struct ceph_fs_client *fsc = ceph_sb_to_client(s); 993 struct ceph_fs_client *fsc = ceph_sb_to_client(s);
994 dev_t dev = s->s_dev;
995
1005 dout("kill_sb %p\n", s); 996 dout("kill_sb %p\n", s);
997
1006 ceph_mdsc_pre_umount(fsc->mdsc); 998 ceph_mdsc_pre_umount(fsc->mdsc);
1007 kill_anon_super(s); /* will call put_super after sb is r/o */ 999 generic_shutdown_super(s);
1008 ceph_mdsc_destroy(fsc); 1000 ceph_mdsc_destroy(fsc);
1001
1009 destroy_fs_client(fsc); 1002 destroy_fs_client(fsc);
1003 free_anon_bdev(dev);
1010} 1004}
1011 1005
1012static struct file_system_type ceph_fs_type = { 1006static struct file_system_type ceph_fs_type = {
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 67b2007f10fe..ea06a3d0364c 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -24,27 +24,6 @@
24 24
25#include "internal.h" 25#include "internal.h"
26 26
27/*
28 * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
29 * devices
30 * - permits shared-mmap for read, write and/or exec
31 * - does not permit private mmap in NOMMU mode (can't do COW)
32 * - no readahead or I/O queue unplugging required
33 */
34struct backing_dev_info directly_mappable_cdev_bdi = {
35 .name = "char",
36 .capabilities = (
37#ifdef CONFIG_MMU
38 /* permit private copies of the data to be taken */
39 BDI_CAP_MAP_COPY |
40#endif
41 /* permit direct mmap, for read, write or exec */
42 BDI_CAP_MAP_DIRECT |
43 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
44 /* no writeback happens */
45 BDI_CAP_NO_ACCT_AND_WRITEBACK),
46};
47
48static struct kobj_map *cdev_map; 27static struct kobj_map *cdev_map;
49 28
50static DEFINE_MUTEX(chrdevs_lock); 29static DEFINE_MUTEX(chrdevs_lock);
@@ -575,8 +554,6 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data)
575void __init chrdev_init(void) 554void __init chrdev_init(void)
576{ 555{
577 cdev_map = kobj_map_init(base_probe, &chrdevs_lock); 556 cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
578 if (bdi_init(&directly_mappable_cdev_bdi))
579 panic("Failed to init directly mappable cdev bdi");
580} 557}
581 558
582 559
@@ -590,4 +567,3 @@ EXPORT_SYMBOL(cdev_del);
590EXPORT_SYMBOL(cdev_add); 567EXPORT_SYMBOL(cdev_add);
591EXPORT_SYMBOL(__register_chrdev); 568EXPORT_SYMBOL(__register_chrdev);
592EXPORT_SYMBOL(__unregister_chrdev); 569EXPORT_SYMBOL(__unregister_chrdev);
593EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9c56ef776407..7febcf2475c5 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -606,9 +606,11 @@ cifs_security_flags_handle_must_flags(unsigned int *flags)
606 *flags = CIFSSEC_MUST_NTLMV2; 606 *flags = CIFSSEC_MUST_NTLMV2;
607 else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM) 607 else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM)
608 *flags = CIFSSEC_MUST_NTLM; 608 *flags = CIFSSEC_MUST_NTLM;
609 else if ((*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN) 609 else if (CIFSSEC_MUST_LANMAN &&
610 (*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN)
610 *flags = CIFSSEC_MUST_LANMAN; 611 *flags = CIFSSEC_MUST_LANMAN;
611 else if ((*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT) 612 else if (CIFSSEC_MUST_PLNTXT &&
613 (*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT)
612 *flags = CIFSSEC_MUST_PLNTXT; 614 *flags = CIFSSEC_MUST_PLNTXT;
613 615
614 *flags |= signflags; 616 *flags |= signflags;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2a772da16b83..d3aa999ab785 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3446,7 +3446,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
3446 int referral_walks_count = 0; 3446 int referral_walks_count = 0;
3447#endif 3447#endif
3448 3448
3449 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); 3449 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs");
3450 if (rc) 3450 if (rc)
3451 return rc; 3451 return rc;
3452 3452
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 96b7e9b7706d..8fe1f7a21b3e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -366,6 +366,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
366 struct cifsLockInfo *li, *tmp; 366 struct cifsLockInfo *li, *tmp;
367 struct cifs_fid fid; 367 struct cifs_fid fid;
368 struct cifs_pending_open open; 368 struct cifs_pending_open open;
369 bool oplock_break_cancelled;
369 370
370 spin_lock(&cifs_file_list_lock); 371 spin_lock(&cifs_file_list_lock);
371 if (--cifs_file->count > 0) { 372 if (--cifs_file->count > 0) {
@@ -397,7 +398,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
397 } 398 }
398 spin_unlock(&cifs_file_list_lock); 399 spin_unlock(&cifs_file_list_lock);
399 400
400 cancel_work_sync(&cifs_file->oplock_break); 401 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
401 402
402 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 403 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
403 struct TCP_Server_Info *server = tcon->ses->server; 404 struct TCP_Server_Info *server = tcon->ses->server;
@@ -409,6 +410,9 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
409 _free_xid(xid); 410 _free_xid(xid);
410 } 411 }
411 412
413 if (oplock_break_cancelled)
414 cifs_done_oplock_break(cifsi);
415
412 cifs_del_pending_open(&open); 416 cifs_del_pending_open(&open);
413 417
414 /* 418 /*
@@ -1109,11 +1113,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1109 return rc; 1113 return rc;
1110} 1114}
1111 1115
1112/* copied from fs/locks.c with a name change */
1113#define cifs_for_each_lock(inode, lockp) \
1114 for (lockp = &inode->i_flock; *lockp != NULL; \
1115 lockp = &(*lockp)->fl_next)
1116
1117struct lock_to_push { 1116struct lock_to_push {
1118 struct list_head llist; 1117 struct list_head llist;
1119 __u64 offset; 1118 __u64 offset;
@@ -1128,8 +1127,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1128{ 1127{
1129 struct inode *inode = cfile->dentry->d_inode; 1128 struct inode *inode = cfile->dentry->d_inode;
1130 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1129 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1131 struct file_lock *flock, **before; 1130 struct file_lock *flock;
1132 unsigned int count = 0, i = 0; 1131 struct file_lock_context *flctx = inode->i_flctx;
1132 unsigned int i;
1133 int rc = 0, xid, type; 1133 int rc = 0, xid, type;
1134 struct list_head locks_to_send, *el; 1134 struct list_head locks_to_send, *el;
1135 struct lock_to_push *lck, *tmp; 1135 struct lock_to_push *lck, *tmp;
@@ -1137,21 +1137,17 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1137 1137
1138 xid = get_xid(); 1138 xid = get_xid();
1139 1139
1140 spin_lock(&inode->i_lock); 1140 if (!flctx)
1141 cifs_for_each_lock(inode, before) { 1141 goto out;
1142 if ((*before)->fl_flags & FL_POSIX)
1143 count++;
1144 }
1145 spin_unlock(&inode->i_lock);
1146 1142
1147 INIT_LIST_HEAD(&locks_to_send); 1143 INIT_LIST_HEAD(&locks_to_send);
1148 1144
1149 /* 1145 /*
1150 * Allocating count locks is enough because no FL_POSIX locks can be 1146 * Allocating flc_posix_cnt locks is enough because no FL_POSIX locks
1151 * added to the list while we are holding cinode->lock_sem that 1147 * can be added to the list while we are holding cinode->lock_sem that
1152 * protects locking operations of this inode. 1148 * protects locking operations of this inode.
1153 */ 1149 */
1154 for (; i < count; i++) { 1150 for (i = 0; i < flctx->flc_posix_cnt; i++) {
1155 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1151 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1156 if (!lck) { 1152 if (!lck) {
1157 rc = -ENOMEM; 1153 rc = -ENOMEM;
@@ -1161,11 +1157,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1161 } 1157 }
1162 1158
1163 el = locks_to_send.next; 1159 el = locks_to_send.next;
1164 spin_lock(&inode->i_lock); 1160 spin_lock(&flctx->flc_lock);
1165 cifs_for_each_lock(inode, before) { 1161 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1166 flock = *before;
1167 if ((flock->fl_flags & FL_POSIX) == 0)
1168 continue;
1169 if (el == &locks_to_send) { 1162 if (el == &locks_to_send) {
1170 /* 1163 /*
1171 * The list ended. We don't have enough allocated 1164 * The list ended. We don't have enough allocated
@@ -1185,9 +1178,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1185 lck->length = length; 1178 lck->length = length;
1186 lck->type = type; 1179 lck->type = type;
1187 lck->offset = flock->fl_start; 1180 lck->offset = flock->fl_start;
1188 el = el->next;
1189 } 1181 }
1190 spin_unlock(&inode->i_lock); 1182 spin_unlock(&flctx->flc_lock);
1191 1183
1192 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1184 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1193 int stored_rc; 1185 int stored_rc;
@@ -3244,7 +3236,6 @@ static struct vm_operations_struct cifs_file_vm_ops = {
3244 .fault = filemap_fault, 3236 .fault = filemap_fault,
3245 .map_pages = filemap_map_pages, 3237 .map_pages = filemap_map_pages,
3246 .page_mkwrite = cifs_page_mkwrite, 3238 .page_mkwrite = cifs_page_mkwrite,
3247 .remap_pages = generic_file_remap_pages,
3248}; 3239};
3249 3240
3250int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 3241int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 0c3ce464cae4..2d4f37235ed0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -937,8 +937,6 @@ retry_iget5_locked:
937 inode->i_flags |= S_NOATIME | S_NOCMTIME; 937 inode->i_flags |= S_NOATIME | S_NOCMTIME;
938 if (inode->i_state & I_NEW) { 938 if (inode->i_state & I_NEW) {
939 inode->i_ino = hash; 939 inode->i_ino = hash;
940 if (S_ISREG(inode->i_mode))
941 inode->i_data.backing_dev_info = sb->s_bdi;
942#ifdef CONFIG_CIFS_FSCACHE 940#ifdef CONFIG_CIFS_FSCACHE
943 /* initialize per-inode cache cookie pointer */ 941 /* initialize per-inode cache cookie pointer */
944 CIFS_I(inode)->fscache = NULL; 942 CIFS_I(inode)->fscache = NULL;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 45cb59bcc791..8b7898b7670f 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -86,21 +86,16 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
86 } 86 }
87 87
88 src_inode = file_inode(src_file.file); 88 src_inode = file_inode(src_file.file);
89 rc = -EINVAL;
90 if (S_ISDIR(src_inode->i_mode))
91 goto out_fput;
89 92
90 /* 93 /*
91 * Note: cifs case is easier than btrfs since server responsible for 94 * Note: cifs case is easier than btrfs since server responsible for
92 * checks for proper open modes and file type and if it wants 95 * checks for proper open modes and file type and if it wants
93 * server could even support copy of range where source = target 96 * server could even support copy of range where source = target
94 */ 97 */
95 98 lock_two_nondirectories(target_inode, src_inode);
96 /* so we do not deadlock racing two ioctls on same files */
97 if (target_inode < src_inode) {
98 mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT);
99 mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
100 } else {
101 mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
102 mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD);
103 }
104 99
105 /* determine range to clone */ 100 /* determine range to clone */
106 rc = -EINVAL; 101 rc = -EINVAL;
@@ -124,13 +119,7 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
124out_unlock: 119out_unlock:
125 /* although unlocking in the reverse order from locking is not 120 /* although unlocking in the reverse order from locking is not
126 strictly necessary here it is a little cleaner to be consistent */ 121 strictly necessary here it is a little cleaner to be consistent */
127 if (target_inode < src_inode) { 122 unlock_two_nondirectories(src_inode, target_inode);
128 mutex_unlock(&src_inode->i_mutex);
129 mutex_unlock(&target_inode->i_mutex);
130 } else {
131 mutex_unlock(&target_inode->i_mutex);
132 mutex_unlock(&src_inode->i_mutex);
133 }
134out_fput: 123out_fput:
135 fdput(src_file); 124 fdput(src_file);
136out_drop_write: 125out_drop_write:
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 6c1566366a66..a4232ec4f2ba 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -221,7 +221,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16,
221 } 221 }
222 222
223 rc = mdfour(p16, (unsigned char *) wpwd, len * sizeof(__le16)); 223 rc = mdfour(p16, (unsigned char *) wpwd, len * sizeof(__le16));
224 memset(wpwd, 0, 129 * sizeof(__le16)); 224 memzero_explicit(wpwd, sizeof(wpwd));
225 225
226 return rc; 226 return rc;
227} 227}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index b945410bfcd5..82ec68b59208 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -183,7 +183,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
183 goto unlock_out; 183 goto unlock_out;
184 } 184 }
185 185
186 error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); 186 error = bdi_setup_and_register(&vc->bdi, "coda");
187 if (error) 187 if (error)
188 goto unlock_out; 188 goto unlock_out;
189 189
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index bd4a3c167091..a315677e44d3 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -70,8 +70,6 @@ extern int configfs_is_root(struct config_item *item);
70 70
71extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *); 71extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *, struct super_block *);
72extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *)); 72extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
73extern int configfs_inode_init(void);
74extern void configfs_inode_exit(void);
75 73
76extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 74extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
77extern int configfs_make_dirent(struct configfs_dirent *, 75extern int configfs_make_dirent(struct configfs_dirent *,
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 5946ad98053f..65af86147154 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -50,12 +50,6 @@ static const struct address_space_operations configfs_aops = {
50 .write_end = simple_write_end, 50 .write_end = simple_write_end,
51}; 51};
52 52
53static struct backing_dev_info configfs_backing_dev_info = {
54 .name = "configfs",
55 .ra_pages = 0, /* No readahead */
56 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
57};
58
59static const struct inode_operations configfs_inode_operations ={ 53static const struct inode_operations configfs_inode_operations ={
60 .setattr = configfs_setattr, 54 .setattr = configfs_setattr,
61}; 55};
@@ -137,7 +131,6 @@ struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
137 if (inode) { 131 if (inode) {
138 inode->i_ino = get_next_ino(); 132 inode->i_ino = get_next_ino();
139 inode->i_mapping->a_ops = &configfs_aops; 133 inode->i_mapping->a_ops = &configfs_aops;
140 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
141 inode->i_op = &configfs_inode_operations; 134 inode->i_op = &configfs_inode_operations;
142 135
143 if (sd->s_iattr) { 136 if (sd->s_iattr) {
@@ -283,13 +276,3 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
283 } 276 }
284 mutex_unlock(&dir->d_inode->i_mutex); 277 mutex_unlock(&dir->d_inode->i_mutex);
285} 278}
286
287int __init configfs_inode_init(void)
288{
289 return bdi_init(&configfs_backing_dev_info);
290}
291
292void configfs_inode_exit(void)
293{
294 bdi_destroy(&configfs_backing_dev_info);
295}
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index f6c285833390..da94e41bdbf6 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -145,19 +145,13 @@ static int __init configfs_init(void)
145 if (!config_kobj) 145 if (!config_kobj)
146 goto out2; 146 goto out2;
147 147
148 err = configfs_inode_init();
149 if (err)
150 goto out3;
151
152 err = register_filesystem(&configfs_fs_type); 148 err = register_filesystem(&configfs_fs_type);
153 if (err) 149 if (err)
154 goto out4; 150 goto out3;
155 151
156 return 0; 152 return 0;
157out4:
158 pr_err("Unable to register filesystem!\n");
159 configfs_inode_exit();
160out3: 153out3:
154 pr_err("Unable to register filesystem!\n");
161 kobject_put(config_kobj); 155 kobject_put(config_kobj);
162out2: 156out2:
163 kmem_cache_destroy(configfs_dir_cachep); 157 kmem_cache_destroy(configfs_dir_cachep);
@@ -172,7 +166,6 @@ static void __exit configfs_exit(void)
172 kobject_put(config_kobj); 166 kobject_put(config_kobj);
173 kmem_cache_destroy(configfs_dir_cachep); 167 kmem_cache_destroy(configfs_dir_cachep);
174 configfs_dir_cachep = NULL; 168 configfs_dir_cachep = NULL;
175 configfs_inode_exit();
176} 169}
177 170
178MODULE_AUTHOR("Oracle"); 171MODULE_AUTHOR("Oracle");
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index e7cfbaf8d0e2..1e6e227134d7 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -56,13 +56,8 @@ static int send_data(struct sk_buff *skb)
56{ 56{
57 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 57 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
58 void *data = genlmsg_data(genlhdr); 58 void *data = genlmsg_data(genlhdr);
59 int rv;
60 59
61 rv = genlmsg_end(skb, data); 60 genlmsg_end(skb, data);
62 if (rv < 0) {
63 nlmsg_free(skb);
64 return rv;
65 }
66 61
67 return genlmsg_unicast(&init_net, skb, listener_nlportid); 62 return genlmsg_unicast(&init_net, skb, listener_nlportid);
68} 63}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1686dc2da9fd..34b36a504059 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -67,7 +67,6 @@ static int ecryptfs_inode_set(struct inode *inode, void *opaque)
67 inode->i_ino = lower_inode->i_ino; 67 inode->i_ino = lower_inode->i_ino;
68 inode->i_version++; 68 inode->i_version++;
69 inode->i_mapping->a_ops = &ecryptfs_aops; 69 inode->i_mapping->a_ops = &ecryptfs_aops;
70 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
71 70
72 if (S_ISLNK(inode->i_mode)) 71 if (S_ISLNK(inode->i_mode))
73 inode->i_op = &ecryptfs_symlink_iops; 72 inode->i_op = &ecryptfs_symlink_iops;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d9eb84bda559..1895d60f4122 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -520,7 +520,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
520 goto out; 520 goto out;
521 } 521 }
522 522
523 rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); 523 rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs");
524 if (rc) 524 if (rc)
525 goto out1; 525 goto out1;
526 526
diff --git a/fs/efivarfs/Kconfig b/fs/efivarfs/Kconfig
index 367bbb10c543..c2499ef174a2 100644
--- a/fs/efivarfs/Kconfig
+++ b/fs/efivarfs/Kconfig
@@ -1,6 +1,7 @@
1config EFIVAR_FS 1config EFIVAR_FS
2 tristate "EFI Variable filesystem" 2 tristate "EFI Variable filesystem"
3 depends on EFI 3 depends on EFI
4 default m
4 help 5 help
5 efivarfs is a replacement filesystem for the old EFI 6 efivarfs is a replacement filesystem for the old EFI
6 variable support via sysfs, as it doesn't suffer from the 7 variable support via sysfs, as it doesn't suffer from the
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 6dad1176ec52..ddbce42548c9 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -140,7 +140,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
140 140
141 name[len] = '-'; 141 name[len] = '-';
142 142
143 efi_guid_unparse(&entry->var.VendorGuid, name + len + 1); 143 efi_guid_to_str(&entry->var.VendorGuid, name + len + 1);
144 144
145 name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; 145 name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
146 146
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index f1d3d4eb8c4f..6fc91df99ff8 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1214,7 +1214,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1214 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1214 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1215 } 1215 }
1216 1216
1217 inode->i_mapping->backing_dev_info = sb->s_bdi;
1218 if (S_ISREG(inode->i_mode)) { 1217 if (S_ISREG(inode->i_mode)) {
1219 inode->i_op = &exofs_file_inode_operations; 1218 inode->i_op = &exofs_file_inode_operations;
1220 inode->i_fop = &exofs_file_operations; 1219 inode->i_fop = &exofs_file_operations;
@@ -1314,7 +1313,6 @@ struct inode *exofs_new_inode(struct inode *dir, umode_t mode)
1314 1313
1315 set_obj_2bcreated(oi); 1314 set_obj_2bcreated(oi);
1316 1315
1317 inode->i_mapping->backing_dev_info = sb->s_bdi;
1318 inode_init_owner(inode, dir, mode); 1316 inode_init_owner(inode, dir, mode);
1319 inode->i_ino = sbi->s_nextid++; 1317 inode->i_ino = sbi->s_nextid++;
1320 inode->i_blkbits = EXOFS_BLKSHIFT; 1318 inode->i_blkbits = EXOFS_BLKSHIFT;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 95965503afcb..fcc2e565f540 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -836,7 +836,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
836 goto free_sbi; 836 goto free_sbi;
837 } 837 }
838 838
839 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); 839 ret = bdi_setup_and_register(&sbi->bdi, "exofs");
840 if (ret) { 840 if (ret) {
841 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); 841 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
842 dput(sb->s_root); 842 dput(sb->s_root);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 7d66fb0e4cca..6c14bb8322fa 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -170,7 +170,7 @@ static void ext2_preread_inode(struct inode *inode)
170 struct ext2_group_desc * gdp; 170 struct ext2_group_desc * gdp;
171 struct backing_dev_info *bdi; 171 struct backing_dev_info *bdi;
172 172
173 bdi = inode->i_mapping->backing_dev_info; 173 bdi = inode_to_bdi(inode);
174 if (bdi_read_congested(bdi)) 174 if (bdi_read_congested(bdi))
175 return; 175 return;
176 if (bdi_write_congested(bdi)) 176 if (bdi_write_congested(bdi))
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9b4e7d750d4f..d4dbf3c259b3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -466,6 +466,8 @@ static void ext3_put_super (struct super_block * sb)
466 } 466 }
467 sb->s_fs_info = NULL; 467 sb->s_fs_info = NULL;
468 kfree(sbi->s_blockgroup_lock); 468 kfree(sbi->s_blockgroup_lock);
469 mutex_destroy(&sbi->s_orphan_lock);
470 mutex_destroy(&sbi->s_resize_lock);
469 kfree(sbi); 471 kfree(sbi);
470} 472}
471 473
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8131be8c0af3..7cb592386121 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -195,7 +195,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
195 .fault = filemap_fault, 195 .fault = filemap_fault,
196 .map_pages = filemap_map_pages, 196 .map_pages = filemap_map_pages,
197 .page_mkwrite = ext4_page_mkwrite, 197 .page_mkwrite = ext4_page_mkwrite,
198 .remap_pages = generic_file_remap_pages,
199}; 198};
200 199
201static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 200static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 74c5f53595fb..64c39c7c594f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -334,7 +334,7 @@ static void save_error_info(struct super_block *sb, const char *func,
334static int block_device_ejected(struct super_block *sb) 334static int block_device_ejected(struct super_block *sb)
335{ 335{
336 struct inode *bd_inode = sb->s_bdev->bd_inode; 336 struct inode *bd_inode = sb->s_bdev->bd_inode;
337 struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; 337 struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
338 338
339 return bdi->dev == NULL; 339 return bdi->dev == NULL;
340} 340}
@@ -1046,10 +1046,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
1046static int ext4_write_info(struct super_block *sb, int type); 1046static int ext4_write_info(struct super_block *sb, int type);
1047static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1047static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1048 struct path *path); 1048 struct path *path);
1049static int ext4_quota_on_sysfile(struct super_block *sb, int type,
1050 int format_id);
1051static int ext4_quota_off(struct super_block *sb, int type); 1049static int ext4_quota_off(struct super_block *sb, int type);
1052static int ext4_quota_off_sysfile(struct super_block *sb, int type);
1053static int ext4_quota_on_mount(struct super_block *sb, int type); 1050static int ext4_quota_on_mount(struct super_block *sb, int type);
1054static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1051static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1055 size_t len, loff_t off); 1052 size_t len, loff_t off);
@@ -1084,16 +1081,6 @@ static const struct quotactl_ops ext4_qctl_operations = {
1084 .get_dqblk = dquot_get_dqblk, 1081 .get_dqblk = dquot_get_dqblk,
1085 .set_dqblk = dquot_set_dqblk 1082 .set_dqblk = dquot_set_dqblk
1086}; 1083};
1087
1088static const struct quotactl_ops ext4_qctl_sysfile_operations = {
1089 .quota_on_meta = ext4_quota_on_sysfile,
1090 .quota_off = ext4_quota_off_sysfile,
1091 .quota_sync = dquot_quota_sync,
1092 .get_info = dquot_get_dqinfo,
1093 .set_info = dquot_set_dqinfo,
1094 .get_dqblk = dquot_get_dqblk,
1095 .set_dqblk = dquot_set_dqblk
1096};
1097#endif 1084#endif
1098 1085
1099static const struct super_operations ext4_sops = { 1086static const struct super_operations ext4_sops = {
@@ -3935,7 +3922,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3935#ifdef CONFIG_QUOTA 3922#ifdef CONFIG_QUOTA
3936 sb->dq_op = &ext4_quota_operations; 3923 sb->dq_op = &ext4_quota_operations;
3937 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) 3924 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
3938 sb->s_qcop = &ext4_qctl_sysfile_operations; 3925 sb->s_qcop = &dquot_quotactl_sysfile_ops;
3939 else 3926 else
3940 sb->s_qcop = &ext4_qctl_operations; 3927 sb->s_qcop = &ext4_qctl_operations;
3941 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 3928 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
@@ -5288,21 +5275,6 @@ static int ext4_enable_quotas(struct super_block *sb)
5288 return 0; 5275 return 0;
5289} 5276}
5290 5277
5291/*
5292 * quota_on function that is used when QUOTA feature is set.
5293 */
5294static int ext4_quota_on_sysfile(struct super_block *sb, int type,
5295 int format_id)
5296{
5297 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5298 return -EINVAL;
5299
5300 /*
5301 * USAGE was enabled at mount time. Only need to enable LIMITS now.
5302 */
5303 return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
5304}
5305
5306static int ext4_quota_off(struct super_block *sb, int type) 5278static int ext4_quota_off(struct super_block *sb, int type)
5307{ 5279{
5308 struct inode *inode = sb_dqopt(sb)->files[type]; 5280 struct inode *inode = sb_dqopt(sb)->files[type];
@@ -5329,18 +5301,6 @@ out:
5329 return dquot_quota_off(sb, type); 5301 return dquot_quota_off(sb, type);
5330} 5302}
5331 5303
5332/*
5333 * quota_off function that is used when QUOTA feature is set.
5334 */
5335static int ext4_quota_off_sysfile(struct super_block *sb, int type)
5336{
5337 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5338 return -EINVAL;
5339
5340 /* Disable only the limits. */
5341 return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
5342}
5343
5344/* Read data from quotafile - avoid pagecache and such because we cannot afford 5304/* Read data from quotafile - avoid pagecache and such because we cannot afford
5345 * acquiring the locks... As quota files are never truncated and quota code 5305 * acquiring the locks... As quota files are never truncated and quota code
5346 * itself serializes the operations (and no one else should touch the files) 5306 * itself serializes the operations (and no one else should touch the files)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c27e0ecb3bc..5674ba13102b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -92,7 +92,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = {
92 .fault = filemap_fault, 92 .fault = filemap_fault,
93 .map_pages = filemap_map_pages, 93 .map_pages = filemap_map_pages,
94 .page_mkwrite = f2fs_vm_page_mkwrite, 94 .page_mkwrite = f2fs_vm_page_mkwrite,
95 .remap_pages = generic_file_remap_pages,
96}; 95};
97 96
98static int get_parent_ino(struct inode *inode, nid_t *pino) 97static int get_parent_ino(struct inode *inode, nid_t *pino)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 2d609a5fbfea..c399152de397 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -66,15 +66,21 @@ int writeback_in_progress(struct backing_dev_info *bdi)
66} 66}
67EXPORT_SYMBOL(writeback_in_progress); 67EXPORT_SYMBOL(writeback_in_progress);
68 68
69static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) 69struct backing_dev_info *inode_to_bdi(struct inode *inode)
70{ 70{
71 struct super_block *sb = inode->i_sb; 71 struct super_block *sb;
72 72
73 if (sb_is_blkdev_sb(sb)) 73 if (!inode)
74 return inode->i_mapping->backing_dev_info; 74 return &noop_backing_dev_info;
75 75
76 sb = inode->i_sb;
77#ifdef CONFIG_BLOCK
78 if (sb_is_blkdev_sb(sb))
79 return blk_get_backing_dev_info(I_BDEV(inode));
80#endif
76 return sb->s_bdi; 81 return sb->s_bdi;
77} 82}
83EXPORT_SYMBOL_GPL(inode_to_bdi);
78 84
79static inline struct inode *wb_inode(struct list_head *head) 85static inline struct inode *wb_inode(struct list_head *head)
80{ 86{
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 760b2c552197..c01ec3bdcfd8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1159,7 +1159,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1159 mutex_lock(&inode->i_mutex); 1159 mutex_lock(&inode->i_mutex);
1160 1160
1161 /* We can write back this queue in page reclaim */ 1161 /* We can write back this queue in page reclaim */
1162 current->backing_dev_info = mapping->backing_dev_info; 1162 current->backing_dev_info = inode_to_bdi(inode);
1163 1163
1164 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1164 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1165 if (err) 1165 if (err)
@@ -1464,7 +1464,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1464{ 1464{
1465 struct inode *inode = req->inode; 1465 struct inode *inode = req->inode;
1466 struct fuse_inode *fi = get_fuse_inode(inode); 1466 struct fuse_inode *fi = get_fuse_inode(inode);
1467 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; 1467 struct backing_dev_info *bdi = inode_to_bdi(inode);
1468 int i; 1468 int i;
1469 1469
1470 list_del(&req->writepages_entry); 1470 list_del(&req->writepages_entry);
@@ -1658,7 +1658,7 @@ static int fuse_writepage_locked(struct page *page)
1658 req->end = fuse_writepage_end; 1658 req->end = fuse_writepage_end;
1659 req->inode = inode; 1659 req->inode = inode;
1660 1660
1661 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); 1661 inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK);
1662 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); 1662 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1663 1663
1664 spin_lock(&fc->lock); 1664 spin_lock(&fc->lock);
@@ -1768,7 +1768,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
1768 1768
1769 if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || 1769 if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
1770 old_req->state == FUSE_REQ_PENDING)) { 1770 old_req->state == FUSE_REQ_PENDING)) {
1771 struct backing_dev_info *bdi = page->mapping->backing_dev_info; 1771 struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host);
1772 1772
1773 copy_highpage(old_req->pages[0], page); 1773 copy_highpage(old_req->pages[0], page);
1774 spin_unlock(&fc->lock); 1774 spin_unlock(&fc->lock);
@@ -1872,7 +1872,7 @@ static int fuse_writepages_fill(struct page *page,
1872 req->page_descs[req->num_pages].offset = 0; 1872 req->page_descs[req->num_pages].offset = 0;
1873 req->page_descs[req->num_pages].length = PAGE_SIZE; 1873 req->page_descs[req->num_pages].length = PAGE_SIZE;
1874 1874
1875 inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); 1875 inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK);
1876 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); 1876 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1877 1877
1878 err = 0; 1878 err = 0;
@@ -2062,7 +2062,6 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
2062 .fault = filemap_fault, 2062 .fault = filemap_fault,
2063 .map_pages = filemap_map_pages, 2063 .map_pages = filemap_map_pages,
2064 .page_mkwrite = fuse_page_mkwrite, 2064 .page_mkwrite = fuse_page_mkwrite,
2065 .remap_pages = generic_file_remap_pages,
2066}; 2065};
2067 2066
2068static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 2067static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f38256e4476e..e8799c11424b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -308,7 +308,6 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
308 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 308 if (!fc->writeback_cache || !S_ISREG(attr->mode))
309 inode->i_flags |= S_NOCMTIME; 309 inode->i_flags |= S_NOCMTIME;
310 inode->i_generation = generation; 310 inode->i_generation = generation;
311 inode->i_data.backing_dev_info = &fc->bdi;
312 fuse_init_inode(inode, attr); 311 fuse_init_inode(inode, attr);
313 unlock_new_inode(inode); 312 unlock_new_inode(inode);
314 } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { 313 } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3088e2a38e30..7b3143064af1 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -73,7 +73,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
73 73
74 BUG_ON(name == NULL); 74 BUG_ON(name == NULL);
75 75
76 if (acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) 76 if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
77 return -E2BIG; 77 return -E2BIG;
78 78
79 if (type == ACL_TYPE_ACCESS) { 79 if (type == ACL_TYPE_ACCESS) {
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 805b37fed638..4ad4f94edebe 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -289,7 +289,7 @@ continue_unlock:
289 if (!clear_page_dirty_for_io(page)) 289 if (!clear_page_dirty_for_io(page))
290 goto continue_unlock; 290 goto continue_unlock;
291 291
292 trace_wbc_writepage(wbc, mapping->backing_dev_info); 292 trace_wbc_writepage(wbc, inode_to_bdi(inode));
293 293
294 ret = __gfs2_jdata_writepage(page, wbc); 294 ret = __gfs2_jdata_writepage(page, wbc);
295 if (unlikely(ret)) { 295 if (unlikely(ret)) {
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c5a34f09e228..6371192961e2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1896,7 +1896,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1896 1896
1897 ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); 1897 ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN);
1898 if (ht == NULL) 1898 if (ht == NULL)
1899 ht = vzalloc(size); 1899 ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO,
1900 PAGE_KERNEL);
1900 if (!ht) 1901 if (!ht)
1901 return -ENOMEM; 1902 return -ENOMEM;
1902 1903
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6e600abf694a..ec9c2d33477a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -498,7 +498,6 @@ static const struct vm_operations_struct gfs2_vm_ops = {
498 .fault = filemap_fault, 498 .fault = filemap_fault,
499 .map_pages = filemap_map_pages, 499 .map_pages = filemap_map_pages,
500 .page_mkwrite = gfs2_page_mkwrite, 500 .page_mkwrite = gfs2_page_mkwrite,
501 .remap_pages = generic_file_remap_pages,
502}; 501};
503 502
504/** 503/**
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a23524aa3eac..f42dffba056a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -173,19 +173,14 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
173 spin_unlock(&lru_lock); 173 spin_unlock(&lru_lock);
174} 174}
175 175
176static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 176static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
177{ 177{
178 spin_lock(&lru_lock);
178 if (!list_empty(&gl->gl_lru)) { 179 if (!list_empty(&gl->gl_lru)) {
179 list_del_init(&gl->gl_lru); 180 list_del_init(&gl->gl_lru);
180 atomic_dec(&lru_count); 181 atomic_dec(&lru_count);
181 clear_bit(GLF_LRU, &gl->gl_flags); 182 clear_bit(GLF_LRU, &gl->gl_flags);
182 } 183 }
183}
184
185static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
186{
187 spin_lock(&lru_lock);
188 __gfs2_glock_remove_from_lru(gl);
189 spin_unlock(&lru_lock); 184 spin_unlock(&lru_lock);
190} 185}
191 186
@@ -205,9 +200,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
205 200
206 lockref_mark_dead(&gl->gl_lockref); 201 lockref_mark_dead(&gl->gl_lockref);
207 202
208 spin_lock(&lru_lock); 203 gfs2_glock_remove_from_lru(gl);
209 __gfs2_glock_remove_from_lru(gl);
210 spin_unlock(&lru_lock);
211 spin_unlock(&gl->gl_lockref.lock); 204 spin_unlock(&gl->gl_lockref.lock);
212 spin_lock_bucket(gl->gl_hash); 205 spin_lock_bucket(gl->gl_hash);
213 hlist_bl_del_rcu(&gl->gl_list); 206 hlist_bl_del_rcu(&gl->gl_list);
@@ -775,7 +768,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
775 mapping->flags = 0; 768 mapping->flags = 0;
776 mapping_set_gfp_mask(mapping, GFP_NOFS); 769 mapping_set_gfp_mask(mapping, GFP_NOFS);
777 mapping->private_data = NULL; 770 mapping->private_data = NULL;
778 mapping->backing_dev_info = s->s_bdi;
779 mapping->writeback_index = 0; 771 mapping->writeback_index = 0;
780 } 772 }
781 773
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9054002ebe70..73c72253faac 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -543,10 +543,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
543 } 543 }
544 544
545 error = gfs2_dir_add(&dip->i_inode, name, ip, da); 545 error = gfs2_dir_add(&dip->i_inode, name, ip, da);
546 if (error)
547 goto fail_end_trans;
548 546
549fail_end_trans:
550 gfs2_trans_end(sdp); 547 gfs2_trans_end(sdp);
551fail_ipreserv: 548fail_ipreserv:
552 gfs2_inplace_release(dip); 549 gfs2_inplace_release(dip);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 8633ad328ee2..efc8e254787c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -112,7 +112,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
112 mapping->flags = 0; 112 mapping->flags = 0;
113 mapping_set_gfp_mask(mapping, GFP_NOFS); 113 mapping_set_gfp_mask(mapping, GFP_NOFS);
114 mapping->private_data = NULL; 114 mapping->private_data = NULL;
115 mapping->backing_dev_info = sb->s_bdi;
116 mapping->writeback_index = 0; 115 mapping->writeback_index = 0;
117 116
118 spin_lock_init(&sdp->sd_log_lock); 117 spin_lock_init(&sdp->sd_log_lock);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c8b148bbdc8b..3e193cb36996 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -667,7 +667,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
667 667
668static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, 668static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
669 s64 change, struct gfs2_quota_data *qd, 669 s64 change, struct gfs2_quota_data *qd,
670 struct fs_disk_quota *fdq) 670 struct qc_dqblk *fdq)
671{ 671{
672 struct inode *inode = &ip->i_inode; 672 struct inode *inode = &ip->i_inode;
673 struct gfs2_sbd *sdp = GFS2_SB(inode); 673 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -697,16 +697,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
697 be64_add_cpu(&q.qu_value, change); 697 be64_add_cpu(&q.qu_value, change);
698 qd->qd_qb.qb_value = q.qu_value; 698 qd->qd_qb.qb_value = q.qu_value;
699 if (fdq) { 699 if (fdq) {
700 if (fdq->d_fieldmask & FS_DQ_BSOFT) { 700 if (fdq->d_fieldmask & QC_SPC_SOFT) {
701 q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); 701 q.qu_warn = cpu_to_be64(fdq->d_spc_softlimit >> sdp->sd_sb.sb_bsize_shift);
702 qd->qd_qb.qb_warn = q.qu_warn; 702 qd->qd_qb.qb_warn = q.qu_warn;
703 } 703 }
704 if (fdq->d_fieldmask & FS_DQ_BHARD) { 704 if (fdq->d_fieldmask & QC_SPC_HARD) {
705 q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); 705 q.qu_limit = cpu_to_be64(fdq->d_spc_hardlimit >> sdp->sd_sb.sb_bsize_shift);
706 qd->qd_qb.qb_limit = q.qu_limit; 706 qd->qd_qb.qb_limit = q.qu_limit;
707 } 707 }
708 if (fdq->d_fieldmask & FS_DQ_BCOUNT) { 708 if (fdq->d_fieldmask & QC_SPACE) {
709 q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); 709 q.qu_value = cpu_to_be64(fdq->d_space >> sdp->sd_sb.sb_bsize_shift);
710 qd->qd_qb.qb_value = q.qu_value; 710 qd->qd_qb.qb_value = q.qu_value;
711 } 711 }
712 } 712 }
@@ -1497,7 +1497,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
1497} 1497}
1498 1498
1499static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, 1499static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1500 struct fs_disk_quota *fdq) 1500 struct qc_dqblk *fdq)
1501{ 1501{
1502 struct gfs2_sbd *sdp = sb->s_fs_info; 1502 struct gfs2_sbd *sdp = sb->s_fs_info;
1503 struct gfs2_quota_lvb *qlvb; 1503 struct gfs2_quota_lvb *qlvb;
@@ -1505,7 +1505,7 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1505 struct gfs2_holder q_gh; 1505 struct gfs2_holder q_gh;
1506 int error; 1506 int error;
1507 1507
1508 memset(fdq, 0, sizeof(struct fs_disk_quota)); 1508 memset(fdq, 0, sizeof(*fdq));
1509 1509
1510 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1510 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1511 return -ESRCH; /* Crazy XFS error code */ 1511 return -ESRCH; /* Crazy XFS error code */
@@ -1522,12 +1522,9 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1522 goto out; 1522 goto out;
1523 1523
1524 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 1524 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
1525 fdq->d_version = FS_DQUOT_VERSION; 1525 fdq->d_spc_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_sb.sb_bsize_shift;
1526 fdq->d_flags = (qid.type == USRQUOTA) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1526 fdq->d_spc_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_sb.sb_bsize_shift;
1527 fdq->d_id = from_kqid_munged(current_user_ns(), qid); 1527 fdq->d_space = be64_to_cpu(qlvb->qb_value) << sdp->sd_sb.sb_bsize_shift;
1528 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
1529 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
1530 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
1531 1528
1532 gfs2_glock_dq_uninit(&q_gh); 1529 gfs2_glock_dq_uninit(&q_gh);
1533out: 1530out:
@@ -1536,10 +1533,10 @@ out:
1536} 1533}
1537 1534
1538/* GFS2 only supports a subset of the XFS fields */ 1535/* GFS2 only supports a subset of the XFS fields */
1539#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) 1536#define GFS2_FIELDMASK (QC_SPC_SOFT|QC_SPC_HARD|QC_SPACE)
1540 1537
1541static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, 1538static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1542 struct fs_disk_quota *fdq) 1539 struct qc_dqblk *fdq)
1543{ 1540{
1544 struct gfs2_sbd *sdp = sb->s_fs_info; 1541 struct gfs2_sbd *sdp = sb->s_fs_info;
1545 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 1542 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
@@ -1583,17 +1580,17 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1583 goto out_i; 1580 goto out_i;
1584 1581
1585 /* If nothing has changed, this is a no-op */ 1582 /* If nothing has changed, this is a no-op */
1586 if ((fdq->d_fieldmask & FS_DQ_BSOFT) && 1583 if ((fdq->d_fieldmask & QC_SPC_SOFT) &&
1587 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) 1584 ((fdq->d_spc_softlimit >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
1588 fdq->d_fieldmask ^= FS_DQ_BSOFT; 1585 fdq->d_fieldmask ^= QC_SPC_SOFT;
1589 1586
1590 if ((fdq->d_fieldmask & FS_DQ_BHARD) && 1587 if ((fdq->d_fieldmask & QC_SPC_HARD) &&
1591 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) 1588 ((fdq->d_spc_hardlimit >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
1592 fdq->d_fieldmask ^= FS_DQ_BHARD; 1589 fdq->d_fieldmask ^= QC_SPC_HARD;
1593 1590
1594 if ((fdq->d_fieldmask & FS_DQ_BCOUNT) && 1591 if ((fdq->d_fieldmask & QC_SPACE) &&
1595 ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value))) 1592 ((fdq->d_space >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
1596 fdq->d_fieldmask ^= FS_DQ_BCOUNT; 1593 fdq->d_fieldmask ^= QC_SPACE;
1597 1594
1598 if (fdq->d_fieldmask == 0) 1595 if (fdq->d_fieldmask == 0)
1599 goto out_i; 1596 goto out_i;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 573bd3b758fa..1b645773c98e 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -439,7 +439,7 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
439 439
440 ls->ls_recover_jid_done = jid; 440 ls->ls_recover_jid_done = jid;
441 ls->ls_recover_jid_status = message; 441 ls->ls_recover_jid_status = message;
442 sprintf(env_jid, "JID=%d", jid); 442 sprintf(env_jid, "JID=%u", jid);
443 sprintf(env_status, "RECOVERY=%s", 443 sprintf(env_status, "RECOVERY=%s",
444 message == LM_RD_SUCCESS ? "Done" : "Failed"); 444 message == LM_RD_SUCCESS ? "Done" : "Failed");
445 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); 445 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 5b327f837de7..1666382b198d 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -743,7 +743,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
743 struct gfs2_inode *ip = GFS2_I(inode); 743 struct gfs2_inode *ip = GFS2_I(inode);
744 struct gfs2_sbd *sdp = GFS2_SB(inode); 744 struct gfs2_sbd *sdp = GFS2_SB(inode);
745 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); 745 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
746 struct backing_dev_info *bdi = metamapping->backing_dev_info; 746 struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
747 int ret = 0; 747 int ret = 0;
748 748
749 if (wbc->sync_mode == WB_SYNC_ALL) 749 if (wbc->sync_mode == WB_SYNC_ALL)
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 3ab566ba5696..ae8e8811f0e8 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -96,7 +96,7 @@ static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
96 struct super_block *sb = sdp->sd_vfs; 96 struct super_block *sb = sdp->sd_vfs;
97 int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1; 97 int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
98 98
99 return snprintf(buf, PAGE_SIZE, "%u\n", frozen); 99 return snprintf(buf, PAGE_SIZE, "%d\n", frozen);
100} 100}
101 101
102static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 102static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 5eba47f593f8..c274aca8e8dc 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -62,12 +62,6 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
62 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 62 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
63} 63}
64 64
65static struct backing_dev_info hugetlbfs_backing_dev_info = {
66 .name = "hugetlbfs",
67 .ra_pages = 0, /* No readahead */
68 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
69};
70
71int sysctl_hugetlb_shm_group; 65int sysctl_hugetlb_shm_group;
72 66
73enum { 67enum {
@@ -498,7 +492,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
498 lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, 492 lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
499 &hugetlbfs_i_mmap_rwsem_key); 493 &hugetlbfs_i_mmap_rwsem_key);
500 inode->i_mapping->a_ops = &hugetlbfs_aops; 494 inode->i_mapping->a_ops = &hugetlbfs_aops;
501 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
502 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 495 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
503 inode->i_mapping->private_data = resv_map; 496 inode->i_mapping->private_data = resv_map;
504 info = HUGETLBFS_I(inode); 497 info = HUGETLBFS_I(inode);
@@ -1032,10 +1025,6 @@ static int __init init_hugetlbfs_fs(void)
1032 return -ENOTSUPP; 1025 return -ENOTSUPP;
1033 } 1026 }
1034 1027
1035 error = bdi_init(&hugetlbfs_backing_dev_info);
1036 if (error)
1037 return error;
1038
1039 error = -ENOMEM; 1028 error = -ENOMEM;
1040 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 1029 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
1041 sizeof(struct hugetlbfs_inode_info), 1030 sizeof(struct hugetlbfs_inode_info),
@@ -1071,7 +1060,6 @@ static int __init init_hugetlbfs_fs(void)
1071 out: 1060 out:
1072 kmem_cache_destroy(hugetlbfs_inode_cachep); 1061 kmem_cache_destroy(hugetlbfs_inode_cachep);
1073 out2: 1062 out2:
1074 bdi_destroy(&hugetlbfs_backing_dev_info);
1075 return error; 1063 return error;
1076} 1064}
1077 1065
@@ -1091,7 +1079,6 @@ static void __exit exit_hugetlbfs_fs(void)
1091 for_each_hstate(h) 1079 for_each_hstate(h)
1092 kern_unmount(hugetlbfs_vfsmount[i++]); 1080 kern_unmount(hugetlbfs_vfsmount[i++]);
1093 unregister_filesystem(&hugetlbfs_fs_type); 1081 unregister_filesystem(&hugetlbfs_fs_type);
1094 bdi_destroy(&hugetlbfs_backing_dev_info);
1095} 1082}
1096 1083
1097module_init(init_hugetlbfs_fs) 1084module_init(init_hugetlbfs_fs)
diff --git a/fs/inode.c b/fs/inode.c
index aa149e7262ac..b7871577571d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -170,20 +170,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
170 atomic_set(&mapping->i_mmap_writable, 0); 170 atomic_set(&mapping->i_mmap_writable, 0);
171 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 171 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
172 mapping->private_data = NULL; 172 mapping->private_data = NULL;
173 mapping->backing_dev_info = &default_backing_dev_info;
174 mapping->writeback_index = 0; 173 mapping->writeback_index = 0;
175
176 /*
177 * If the block_device provides a backing_dev_info for client
178 * inodes then use that. Otherwise the inode share the bdev's
179 * backing_dev_info.
180 */
181 if (sb->s_bdev) {
182 struct backing_dev_info *bdi;
183
184 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
185 mapping->backing_dev_info = bdi;
186 }
187 inode->i_private = NULL; 174 inode->i_private = NULL;
188 inode->i_mapping = mapping; 175 inode->i_mapping = mapping;
189 INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ 176 INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
@@ -194,7 +181,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
194#ifdef CONFIG_FSNOTIFY 181#ifdef CONFIG_FSNOTIFY
195 inode->i_fsnotify_mask = 0; 182 inode->i_fsnotify_mask = 0;
196#endif 183#endif
197 184 inode->i_flctx = NULL;
198 this_cpu_inc(nr_inodes); 185 this_cpu_inc(nr_inodes);
199 186
200 return 0; 187 return 0;
@@ -237,6 +224,7 @@ void __destroy_inode(struct inode *inode)
237 BUG_ON(inode_has_buffers(inode)); 224 BUG_ON(inode_has_buffers(inode));
238 security_inode_free(inode); 225 security_inode_free(inode);
239 fsnotify_inode_delete(inode); 226 fsnotify_inode_delete(inode);
227 locks_free_lock_context(inode->i_flctx);
240 if (!inode->i_nlink) { 228 if (!inode->i_nlink) {
241 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); 229 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
242 atomic_long_dec(&inode->i_sb->s_remove_count); 230 atomic_long_dec(&inode->i_sb->s_remove_count);
@@ -355,7 +343,6 @@ void address_space_init_once(struct address_space *mapping)
355 INIT_LIST_HEAD(&mapping->private_list); 343 INIT_LIST_HEAD(&mapping->private_list);
356 spin_lock_init(&mapping->private_lock); 344 spin_lock_init(&mapping->private_lock);
357 mapping->i_mmap = RB_ROOT; 345 mapping->i_mmap = RB_ROOT;
358 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
359} 346}
360EXPORT_SYMBOL(address_space_init_once); 347EXPORT_SYMBOL(address_space_init_once);
361 348
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 214c3c11fbc2..5d01d2638ca5 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -379,6 +379,11 @@ int __generic_block_fiemap(struct inode *inode,
379 past_eof = true; 379 past_eof = true;
380 } 380 }
381 cond_resched(); 381 cond_resched();
382 if (fatal_signal_pending(current)) {
383 ret = -EINTR;
384 break;
385 }
386
382 } while (1); 387 } while (1);
383 388
384 /* If ret is 1 then we just hit the end of the extent array */ 389 /* If ret is 1 then we just hit the end of the extent array */
diff --git a/fs/isofs/util.c b/fs/isofs/util.c
index 01e1ee7a998b..005a15cfd30a 100644
--- a/fs/isofs/util.c
+++ b/fs/isofs/util.c
@@ -2,6 +2,7 @@
2 * linux/fs/isofs/util.c 2 * linux/fs/isofs/util.c
3 */ 3 */
4 4
5#include <linux/time.h>
5#include "isofs.h" 6#include "isofs.h"
6 7
7/* 8/*
@@ -17,9 +18,9 @@
17int iso_date(char * p, int flag) 18int iso_date(char * p, int flag)
18{ 19{
19 int year, month, day, hour, minute, second, tz; 20 int year, month, day, hour, minute, second, tz;
20 int crtime, days, i; 21 int crtime;
21 22
22 year = p[0] - 70; 23 year = p[0];
23 month = p[1]; 24 month = p[1];
24 day = p[2]; 25 day = p[2];
25 hour = p[3]; 26 hour = p[3];
@@ -31,18 +32,7 @@ int iso_date(char * p, int flag)
31 if (year < 0) { 32 if (year < 0) {
32 crtime = 0; 33 crtime = 0;
33 } else { 34 } else {
34 int monlen[12] = {31,28,31,30,31,30,31,31,30,31,30,31}; 35 crtime = mktime64(year+1900, month, day, hour, minute, second);
35
36 days = year * 365;
37 if (year > 2)
38 days += (year+1) / 4;
39 for (i = 1; i < month; i++)
40 days += monlen[i-1];
41 if (((year+2) % 4) == 0 && month > 2)
42 days++;
43 days += day - 1;
44 crtime = ((((days * 24) + hour) * 60 + minute) * 60)
45 + second;
46 36
47 /* sign extend */ 37 /* sign extend */
48 if (tz & 0x80) 38 if (tz & 0x80)
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
deleted file mode 100644
index fa92f7f1d0d0..000000000000
--- a/fs/jfs/endian24.h
+++ /dev/null
@@ -1,49 +0,0 @@
1/*
2 * Copyright (C) International Business Machines Corp., 2001
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18#ifndef _H_ENDIAN24
19#define _H_ENDIAN24
20
21/*
22 * endian24.h:
23 *
24 * Endian conversion for 24-byte data
25 *
26 */
27#define __swab24(x) \
28({ \
29 __u32 __x = (x); \
30 ((__u32)( \
31 ((__x & (__u32)0x000000ffUL) << 16) | \
32 (__x & (__u32)0x0000ff00UL) | \
33 ((__x & (__u32)0x00ff0000UL) >> 16) )); \
34})
35
36#if (defined(__KERNEL__) && defined(__LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN))
37 #define __cpu_to_le24(x) ((__u32)(x))
38 #define __le24_to_cpu(x) ((__u32)(x))
39#else
40 #define __cpu_to_le24(x) __swab24(x)
41 #define __le24_to_cpu(x) __swab24(x)
42#endif
43
44#ifdef __KERNEL__
45 #define cpu_to_le24 __cpu_to_le24
46 #define le24_to_cpu __le24_to_cpu
47#endif
48
49#endif /* !_H_ENDIAN24 */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 984c2bbf4f61..d88576e23fe4 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -1040,8 +1040,8 @@ static int dtSplitUp(tid_t tid,
1040 pxdlist.maxnpxd = 1; 1040 pxdlist.maxnpxd = 1;
1041 pxdlist.npxd = 0; 1041 pxdlist.npxd = 0;
1042 pxd = &pxdlist.pxd[0]; 1042 pxd = &pxdlist.pxd[0];
1043 PXDaddress(pxd, nxaddr) 1043 PXDaddress(pxd, nxaddr);
1044 PXDlength(pxd, xlen + n); 1044 PXDlength(pxd, xlen + n);
1045 split->pxdlist = &pxdlist; 1045 split->pxdlist = &pxdlist;
1046 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1046 if ((rc = dtExtendPage(tid, ip, split, btstack))) {
1047 nxaddr = addressPXD(pxd); 1047 nxaddr = addressPXD(pxd);
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 43ea3713c083..8f602dcb51fa 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -30,8 +30,6 @@
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/nls.h> 31#include <linux/nls.h>
32 32
33#include "endian24.h"
34
35/* 33/*
36 * transaction and lock id's 34 * transaction and lock id's
37 * 35 *
@@ -59,26 +57,42 @@ struct timestruc_t {
59 57
60/* 58/*
61 * physical xd (pxd) 59 * physical xd (pxd)
60 *
61 * The leftmost 24 bits of len_addr are the extent length.
62 * The rightmost 8 bits of len_addr are the most signficant bits of
63 * the extent address
62 */ 64 */
63typedef struct { 65typedef struct {
64 unsigned len:24; 66 __le32 len_addr;
65 unsigned addr1:8;
66 __le32 addr2; 67 __le32 addr2;
67} pxd_t; 68} pxd_t;
68 69
69/* xd_t field construction */ 70/* xd_t field construction */
70 71
71#define PXDlength(pxd, length32) ((pxd)->len = __cpu_to_le24(length32)) 72static inline void PXDlength(pxd_t *pxd, __u32 len)
72#define PXDaddress(pxd, address64)\ 73{
73{\ 74 pxd->len_addr = (pxd->len_addr & cpu_to_le32(~0xffffff)) |
74 (pxd)->addr1 = ((s64)address64) >> 32;\ 75 cpu_to_le32(len & 0xffffff);
75 (pxd)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ 76}
77
78static inline void PXDaddress(pxd_t *pxd, __u64 addr)
79{
80 pxd->len_addr = (pxd->len_addr & cpu_to_le32(0xffffff)) |
81 cpu_to_le32((addr >> 32)<<24);
82 pxd->addr2 = cpu_to_le32(addr & 0xffffffff);
76} 83}
77 84
78/* xd_t field extraction */ 85/* xd_t field extraction */
79#define lengthPXD(pxd) __le24_to_cpu((pxd)->len) 86static inline __u32 lengthPXD(pxd_t *pxd)
80#define addressPXD(pxd)\ 87{
81 ( ((s64)((pxd)->addr1)) << 32 | __le32_to_cpu((pxd)->addr2)) 88 return le32_to_cpu((pxd)->len_addr) & 0xffffff;
89}
90
91static inline __u64 addressPXD(pxd_t *pxd)
92{
93 __u64 n = le32_to_cpu(pxd->len_addr) & ~0xffffff;
94 return (n << 8) + le32_to_cpu(pxd->addr2);
95}
82 96
83#define MAXTREEHEIGHT 8 97#define MAXTREEHEIGHT 8
84/* pxd list */ 98/* pxd list */
@@ -93,12 +107,10 @@ struct pxdlist {
93 * data extent descriptor (dxd) 107 * data extent descriptor (dxd)
94 */ 108 */
95typedef struct { 109typedef struct {
96 unsigned flag:8; /* 1: flags */ 110 __u8 flag; /* 1: flags */
97 unsigned rsrvd:24; 111 __u8 rsrvd[3];
98 __le32 size; /* 4: size in byte */ 112 __le32 size; /* 4: size in byte */
99 unsigned len:24; /* 3: length in unit of fsblksize */ 113 pxd_t loc; /* 8: address and length in unit of fsblksize */
100 unsigned addr1:8; /* 1: address in unit of fsblksize */
101 __le32 addr2; /* 4: address in unit of fsblksize */
102} dxd_t; /* - 16 - */ 114} dxd_t; /* - 16 - */
103 115
104/* dxd_t flags */ 116/* dxd_t flags */
@@ -109,12 +121,11 @@ typedef struct {
109#define DXD_CORRUPT 0x08 /* Inconsistency detected */ 121#define DXD_CORRUPT 0x08 /* Inconsistency detected */
110 122
111/* dxd_t field construction 123/* dxd_t field construction
112 * Conveniently, the PXD macros work for DXD
113 */ 124 */
114#define DXDlength PXDlength 125#define DXDlength(dxd, len) PXDlength(&(dxd)->loc, len)
115#define DXDaddress PXDaddress 126#define DXDaddress(dxd, addr) PXDaddress(&(dxd)->loc, addr)
116#define lengthDXD lengthPXD 127#define lengthDXD(dxd) lengthPXD(&(dxd)->loc)
117#define addressDXD addressPXD 128#define addressDXD(dxd) addressPXD(&(dxd)->loc)
118#define DXDsize(dxd, size32) ((dxd)->size = cpu_to_le32(size32)) 129#define DXDsize(dxd, size32) ((dxd)->size = cpu_to_le32(size32))
119#define sizeDXD(dxd) le32_to_cpu((dxd)->size) 130#define sizeDXD(dxd) le32_to_cpu((dxd)->size)
120 131
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 08c0c749b986..1e0987986d5f 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -29,13 +29,11 @@
29 * extent allocation descriptor (xad) 29 * extent allocation descriptor (xad)
30 */ 30 */
31typedef struct xad { 31typedef struct xad {
32 unsigned flag:8; /* 1: flag */ 32 __u8 flag; /* 1: flag */
33 unsigned rsvrd:16; /* 2: reserved */ 33 __u8 rsvrd[2]; /* 2: reserved */
34 unsigned off1:8; /* 1: offset in unit of fsblksize */ 34 __u8 off1; /* 1: offset in unit of fsblksize */
35 __le32 off2; /* 4: offset in unit of fsblksize */ 35 __le32 off2; /* 4: offset in unit of fsblksize */
36 unsigned len:24; /* 3: length in unit of fsblksize */ 36 pxd_t loc; /* 8: length and address in unit of fsblksize */
37 unsigned addr1:8; /* 1: address in unit of fsblksize */
38 __le32 addr2; /* 4: address in unit of fsblksize */
39} xad_t; /* (16) */ 37} xad_t; /* (16) */
40 38
41#define MAXXLEN ((1 << 24) - 1) 39#define MAXXLEN ((1 << 24) - 1)
@@ -49,19 +47,14 @@ typedef struct xad {
49 (xad)->off1 = ((u64)offset64) >> 32;\ 47 (xad)->off1 = ((u64)offset64) >> 32;\
50 (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ 48 (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
51} 49}
52#define XADaddress(xad, address64)\ 50#define XADaddress(xad, address64) PXDaddress(&(xad)->loc, address64)
53{\ 51#define XADlength(xad, length32) PXDlength(&(xad)->loc, length32)
54 (xad)->addr1 = ((u64)address64) >> 32;\
55 (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
56}
57#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32)
58 52
59/* xad_t field extraction */ 53/* xad_t field extraction */
60#define offsetXAD(xad)\ 54#define offsetXAD(xad)\
61 ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) 55 ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
62#define addressXAD(xad)\ 56#define addressXAD(xad) addressPXD(&(xad)->loc)
63 ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) 57#define lengthXAD(xad) lengthPXD(&(xad)->loc)
64#define lengthXAD(xad) __le24_to_cpu((xad)->len)
65 58
66/* xad list */ 59/* xad list */
67struct xadlist { 60struct xadlist {
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 16c3a9556634..5d30c56ae075 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -619,8 +619,7 @@ out_mount_failed:
619 iput(sbi->direct_inode); 619 iput(sbi->direct_inode);
620 sbi->direct_inode = NULL; 620 sbi->direct_inode = NULL;
621out_unload: 621out_unload:
622 if (sbi->nls_tab) 622 unload_nls(sbi->nls_tab);
623 unload_nls(sbi->nls_tab);
624out_kfree: 623out_kfree:
625 kfree(sbi); 624 kfree(sbi);
626 return ret; 625 return ret;
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 985217626e66..9000874a945b 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -24,12 +24,6 @@ static const struct address_space_operations kernfs_aops = {
24 .write_end = simple_write_end, 24 .write_end = simple_write_end,
25}; 25};
26 26
27static struct backing_dev_info kernfs_bdi = {
28 .name = "kernfs",
29 .ra_pages = 0, /* No readahead */
30 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
31};
32
33static const struct inode_operations kernfs_iops = { 27static const struct inode_operations kernfs_iops = {
34 .permission = kernfs_iop_permission, 28 .permission = kernfs_iop_permission,
35 .setattr = kernfs_iop_setattr, 29 .setattr = kernfs_iop_setattr,
@@ -40,12 +34,6 @@ static const struct inode_operations kernfs_iops = {
40 .listxattr = kernfs_iop_listxattr, 34 .listxattr = kernfs_iop_listxattr,
41}; 35};
42 36
43void __init kernfs_inode_init(void)
44{
45 if (bdi_init(&kernfs_bdi))
46 panic("failed to init kernfs_bdi");
47}
48
49static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn) 37static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
50{ 38{
51 static DEFINE_MUTEX(iattr_mutex); 39 static DEFINE_MUTEX(iattr_mutex);
@@ -298,7 +286,6 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
298 kernfs_get(kn); 286 kernfs_get(kn);
299 inode->i_private = kn; 287 inode->i_private = kn;
300 inode->i_mapping->a_ops = &kernfs_aops; 288 inode->i_mapping->a_ops = &kernfs_aops;
301 inode->i_mapping->backing_dev_info = &kernfs_bdi;
302 inode->i_op = &kernfs_iops; 289 inode->i_op = &kernfs_iops;
303 290
304 set_default_inode_attr(inode, kn->mode); 291 set_default_inode_attr(inode, kn->mode);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index dc84a3ef9ca2..af9fa7499919 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -88,7 +88,6 @@ int kernfs_iop_removexattr(struct dentry *dentry, const char *name);
88ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, 88ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
89 size_t size); 89 size_t size);
90ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); 90ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
91void kernfs_inode_init(void);
92 91
93/* 92/*
94 * dir.c 93 * dir.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index f973ae9b05f1..8eaf417187f1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -246,5 +246,4 @@ void __init kernfs_init(void)
246 kernfs_node_cache = kmem_cache_create("kernfs_node_cache", 246 kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
247 sizeof(struct kernfs_node), 247 sizeof(struct kernfs_node),
248 0, SLAB_PANIC, NULL); 248 0, SLAB_PANIC, NULL);
249 kernfs_inode_init();
250} 249}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 56598742dde4..5581e020644b 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(nlm_blocked_lock);
57static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) 57static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
58{ 58{
59 /* 59 /*
60 * We can get away with a static buffer because we're only 60 * We can get away with a static buffer because this is only called
61 * called with BKL held. 61 * from lockd, which is single-threaded.
62 */ 62 */
63 static char buf[2*NLM_MAXCOOKIELEN+1]; 63 static char buf[2*NLM_MAXCOOKIELEN+1];
64 unsigned int i, len = sizeof(buf); 64 unsigned int i, len = sizeof(buf);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d12ff4e2dbe7..665ef5a05183 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
164{ 164{
165 struct inode *inode = nlmsvc_file_inode(file); 165 struct inode *inode = nlmsvc_file_inode(file);
166 struct file_lock *fl; 166 struct file_lock *fl;
167 struct file_lock_context *flctx = inode->i_flctx;
167 struct nlm_host *lockhost; 168 struct nlm_host *lockhost;
168 169
170 if (!flctx || list_empty_careful(&flctx->flc_posix))
171 return 0;
169again: 172again:
170 file->f_locks = 0; 173 file->f_locks = 0;
171 spin_lock(&inode->i_lock); 174 spin_lock(&flctx->flc_lock);
172 for (fl = inode->i_flock; fl; fl = fl->fl_next) { 175 list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
173 if (fl->fl_lmops != &nlmsvc_lock_operations) 176 if (fl->fl_lmops != &nlmsvc_lock_operations)
174 continue; 177 continue;
175 178
@@ -180,7 +183,7 @@ again:
180 if (match(lockhost, host)) { 183 if (match(lockhost, host)) {
181 struct file_lock lock = *fl; 184 struct file_lock lock = *fl;
182 185
183 spin_unlock(&inode->i_lock); 186 spin_unlock(&flctx->flc_lock);
184 lock.fl_type = F_UNLCK; 187 lock.fl_type = F_UNLCK;
185 lock.fl_start = 0; 188 lock.fl_start = 0;
186 lock.fl_end = OFFSET_MAX; 189 lock.fl_end = OFFSET_MAX;
@@ -192,7 +195,7 @@ again:
192 goto again; 195 goto again;
193 } 196 }
194 } 197 }
195 spin_unlock(&inode->i_lock); 198 spin_unlock(&flctx->flc_lock);
196 199
197 return 0; 200 return 0;
198} 201}
@@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file)
223{ 226{
224 struct inode *inode = nlmsvc_file_inode(file); 227 struct inode *inode = nlmsvc_file_inode(file);
225 struct file_lock *fl; 228 struct file_lock *fl;
229 struct file_lock_context *flctx = inode->i_flctx;
226 230
227 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) 231 if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
228 return 1; 232 return 1;
229 233
230 spin_lock(&inode->i_lock); 234 if (flctx && !list_empty_careful(&flctx->flc_posix)) {
231 for (fl = inode->i_flock; fl; fl = fl->fl_next) { 235 spin_lock(&flctx->flc_lock);
232 if (fl->fl_lmops == &nlmsvc_lock_operations) { 236 list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
233 spin_unlock(&inode->i_lock); 237 if (fl->fl_lmops == &nlmsvc_lock_operations) {
234 return 1; 238 spin_unlock(&flctx->flc_lock);
239 return 1;
240 }
235 } 241 }
242 spin_unlock(&flctx->flc_lock);
236 } 243 }
237 spin_unlock(&inode->i_lock);
238 file->f_locks = 0; 244 file->f_locks = 0;
239 return 0; 245 return 0;
240} 246}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 9340e7e10ef6..5b651daad518 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -95,14 +95,6 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f)
95 return p + XDR_QUADLEN(NFS2_FHSIZE); 95 return p + XDR_QUADLEN(NFS2_FHSIZE);
96} 96}
97 97
98static inline __be32 *
99nlm_encode_fh(__be32 *p, struct nfs_fh *f)
100{
101 *p++ = htonl(NFS2_FHSIZE);
102 memcpy(p, f->data, NFS2_FHSIZE);
103 return p + XDR_QUADLEN(NFS2_FHSIZE);
104}
105
106/* 98/*
107 * Encode and decode owner handle 99 * Encode and decode owner handle
108 */ 100 */
diff --git a/fs/locks.c b/fs/locks.c
index 59e2f905e4ff..4753218f308e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -137,7 +137,7 @@
137 137
138#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 138#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
139#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 139#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
140#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) 140#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
141#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) 141#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
142 142
143static bool lease_breaking(struct file_lock *fl) 143static bool lease_breaking(struct file_lock *fl)
@@ -157,14 +157,11 @@ static int target_leasetype(struct file_lock *fl)
157int leases_enable = 1; 157int leases_enable = 1;
158int lease_break_time = 45; 158int lease_break_time = 45;
159 159
160#define for_each_lock(inode, lockp) \
161 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
162
163/* 160/*
164 * The global file_lock_list is only used for displaying /proc/locks, so we 161 * The global file_lock_list is only used for displaying /proc/locks, so we
165 * keep a list on each CPU, with each list protected by its own spinlock via 162 * keep a list on each CPU, with each list protected by its own spinlock via
166 * the file_lock_lglock. Note that alterations to the list also require that 163 * the file_lock_lglock. Note that alterations to the list also require that
167 * the relevant i_lock is held. 164 * the relevant flc_lock is held.
168 */ 165 */
169DEFINE_STATIC_LGLOCK(file_lock_lglock); 166DEFINE_STATIC_LGLOCK(file_lock_lglock);
170static DEFINE_PER_CPU(struct hlist_head, file_lock_list); 167static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
@@ -192,21 +189,68 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
192 * contrast to those that are acting as records of acquired locks). 189 * contrast to those that are acting as records of acquired locks).
193 * 190 *
194 * Note that when we acquire this lock in order to change the above fields, 191 * Note that when we acquire this lock in order to change the above fields,
195 * we often hold the i_lock as well. In certain cases, when reading the fields 192 * we often hold the flc_lock as well. In certain cases, when reading the fields
196 * protected by this lock, we can skip acquiring it iff we already hold the 193 * protected by this lock, we can skip acquiring it iff we already hold the
197 * i_lock. 194 * flc_lock.
198 * 195 *
199 * In particular, adding an entry to the fl_block list requires that you hold 196 * In particular, adding an entry to the fl_block list requires that you hold
200 * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting 197 * both the flc_lock and the blocked_lock_lock (acquired in that order).
201 * an entry from the list however only requires the file_lock_lock. 198 * Deleting an entry from the list however only requires the file_lock_lock.
202 */ 199 */
203static DEFINE_SPINLOCK(blocked_lock_lock); 200static DEFINE_SPINLOCK(blocked_lock_lock);
204 201
202static struct kmem_cache *flctx_cache __read_mostly;
205static struct kmem_cache *filelock_cache __read_mostly; 203static struct kmem_cache *filelock_cache __read_mostly;
206 204
205static struct file_lock_context *
206locks_get_lock_context(struct inode *inode)
207{
208 struct file_lock_context *new;
209
210 if (likely(inode->i_flctx))
211 goto out;
212
213 new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
214 if (!new)
215 goto out;
216
217 spin_lock_init(&new->flc_lock);
218 INIT_LIST_HEAD(&new->flc_flock);
219 INIT_LIST_HEAD(&new->flc_posix);
220 INIT_LIST_HEAD(&new->flc_lease);
221
222 /*
223 * Assign the pointer if it's not already assigned. If it is, then
224 * free the context we just allocated.
225 */
226 spin_lock(&inode->i_lock);
227 if (likely(!inode->i_flctx)) {
228 inode->i_flctx = new;
229 new = NULL;
230 }
231 spin_unlock(&inode->i_lock);
232
233 if (new)
234 kmem_cache_free(flctx_cache, new);
235out:
236 return inode->i_flctx;
237}
238
239void
240locks_free_lock_context(struct file_lock_context *ctx)
241{
242 if (ctx) {
243 WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
244 WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
245 WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
246 kmem_cache_free(flctx_cache, ctx);
247 }
248}
249
207static void locks_init_lock_heads(struct file_lock *fl) 250static void locks_init_lock_heads(struct file_lock *fl)
208{ 251{
209 INIT_HLIST_NODE(&fl->fl_link); 252 INIT_HLIST_NODE(&fl->fl_link);
253 INIT_LIST_HEAD(&fl->fl_list);
210 INIT_LIST_HEAD(&fl->fl_block); 254 INIT_LIST_HEAD(&fl->fl_block);
211 init_waitqueue_head(&fl->fl_wait); 255 init_waitqueue_head(&fl->fl_wait);
212} 256}
@@ -243,6 +287,7 @@ EXPORT_SYMBOL_GPL(locks_release_private);
243void locks_free_lock(struct file_lock *fl) 287void locks_free_lock(struct file_lock *fl)
244{ 288{
245 BUG_ON(waitqueue_active(&fl->fl_wait)); 289 BUG_ON(waitqueue_active(&fl->fl_wait));
290 BUG_ON(!list_empty(&fl->fl_list));
246 BUG_ON(!list_empty(&fl->fl_block)); 291 BUG_ON(!list_empty(&fl->fl_block));
247 BUG_ON(!hlist_unhashed(&fl->fl_link)); 292 BUG_ON(!hlist_unhashed(&fl->fl_link));
248 293
@@ -257,8 +302,8 @@ locks_dispose_list(struct list_head *dispose)
257 struct file_lock *fl; 302 struct file_lock *fl;
258 303
259 while (!list_empty(dispose)) { 304 while (!list_empty(dispose)) {
260 fl = list_first_entry(dispose, struct file_lock, fl_block); 305 fl = list_first_entry(dispose, struct file_lock, fl_list);
261 list_del_init(&fl->fl_block); 306 list_del_init(&fl->fl_list);
262 locks_free_lock(fl); 307 locks_free_lock(fl);
263 } 308 }
264} 309}
@@ -513,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
513 return fl1->fl_owner == fl2->fl_owner; 558 return fl1->fl_owner == fl2->fl_owner;
514} 559}
515 560
516/* Must be called with the i_lock held! */ 561/* Must be called with the flc_lock held! */
517static void locks_insert_global_locks(struct file_lock *fl) 562static void locks_insert_global_locks(struct file_lock *fl)
518{ 563{
519 lg_local_lock(&file_lock_lglock); 564 lg_local_lock(&file_lock_lglock);
@@ -522,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl)
522 lg_local_unlock(&file_lock_lglock); 567 lg_local_unlock(&file_lock_lglock);
523} 568}
524 569
525/* Must be called with the i_lock held! */ 570/* Must be called with the flc_lock held! */
526static void locks_delete_global_locks(struct file_lock *fl) 571static void locks_delete_global_locks(struct file_lock *fl)
527{ 572{
528 /* 573 /*
529 * Avoid taking lock if already unhashed. This is safe since this check 574 * Avoid taking lock if already unhashed. This is safe since this check
530 * is done while holding the i_lock, and new insertions into the list 575 * is done while holding the flc_lock, and new insertions into the list
531 * also require that it be held. 576 * also require that it be held.
532 */ 577 */
533 if (hlist_unhashed(&fl->fl_link)) 578 if (hlist_unhashed(&fl->fl_link))
@@ -579,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter)
579 * the order they blocked. The documentation doesn't require this but 624 * the order they blocked. The documentation doesn't require this but
580 * it seems like the reasonable thing to do. 625 * it seems like the reasonable thing to do.
581 * 626 *
582 * Must be called with both the i_lock and blocked_lock_lock held. The fl_block 627 * Must be called with both the flc_lock and blocked_lock_lock held. The
583 * list itself is protected by the blocked_lock_lock, but by ensuring that the 628 * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
584 * i_lock is also held on insertions we can avoid taking the blocked_lock_lock 629 * that the flc_lock is also held on insertions we can avoid taking the
585 * in some cases when we see that the fl_block list is empty. 630 * blocked_lock_lock in some cases when we see that the fl_block list is empty.
586 */ 631 */
587static void __locks_insert_block(struct file_lock *blocker, 632static void __locks_insert_block(struct file_lock *blocker,
588 struct file_lock *waiter) 633 struct file_lock *waiter)
@@ -594,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker,
594 locks_insert_global_blocked(waiter); 639 locks_insert_global_blocked(waiter);
595} 640}
596 641
597/* Must be called with i_lock held. */ 642/* Must be called with flc_lock held. */
598static void locks_insert_block(struct file_lock *blocker, 643static void locks_insert_block(struct file_lock *blocker,
599 struct file_lock *waiter) 644 struct file_lock *waiter)
600{ 645{
@@ -606,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker,
606/* 651/*
607 * Wake up processes blocked waiting for blocker. 652 * Wake up processes blocked waiting for blocker.
608 * 653 *
609 * Must be called with the inode->i_lock held! 654 * Must be called with the inode->flc_lock held!
610 */ 655 */
611static void locks_wake_up_blocks(struct file_lock *blocker) 656static void locks_wake_up_blocks(struct file_lock *blocker)
612{ 657{
613 /* 658 /*
614 * Avoid taking global lock if list is empty. This is safe since new 659 * Avoid taking global lock if list is empty. This is safe since new
615 * blocked requests are only added to the list under the i_lock, and 660 * blocked requests are only added to the list under the flc_lock, and
616 * the i_lock is always held here. Note that removal from the fl_block 661 * the flc_lock is always held here. Note that removal from the fl_block
617 * list does not require the i_lock, so we must recheck list_empty() 662 * list does not require the flc_lock, so we must recheck list_empty()
618 * after acquiring the blocked_lock_lock. 663 * after acquiring the blocked_lock_lock.
619 */ 664 */
620 if (list_empty(&blocker->fl_block)) 665 if (list_empty(&blocker->fl_block))
@@ -635,63 +680,36 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
635 spin_unlock(&blocked_lock_lock); 680 spin_unlock(&blocked_lock_lock);
636} 681}
637 682
638/* Insert file lock fl into an inode's lock list at the position indicated 683static void
639 * by pos. At the same time add the lock to the global file lock list. 684locks_insert_lock_ctx(struct file_lock *fl, int *counter,
640 * 685 struct list_head *before)
641 * Must be called with the i_lock held!
642 */
643static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
644{ 686{
645 fl->fl_nspid = get_pid(task_tgid(current)); 687 fl->fl_nspid = get_pid(task_tgid(current));
646 688 list_add_tail(&fl->fl_list, before);
647 /* insert into file's list */ 689 ++*counter;
648 fl->fl_next = *pos;
649 *pos = fl;
650
651 locks_insert_global_locks(fl); 690 locks_insert_global_locks(fl);
652} 691}
653 692
654/** 693static void
655 * locks_delete_lock - Delete a lock and then free it. 694locks_unlink_lock_ctx(struct file_lock *fl, int *counter)
656 * @thisfl_p: pointer that points to the fl_next field of the previous
657 * inode->i_flock list entry
658 *
659 * Unlink a lock from all lists and free the namespace reference, but don't
660 * free it yet. Wake up processes that are blocked waiting for this lock and
661 * notify the FS that the lock has been cleared.
662 *
663 * Must be called with the i_lock held!
664 */
665static void locks_unlink_lock(struct file_lock **thisfl_p)
666{ 695{
667 struct file_lock *fl = *thisfl_p;
668
669 locks_delete_global_locks(fl); 696 locks_delete_global_locks(fl);
670 697 list_del_init(&fl->fl_list);
671 *thisfl_p = fl->fl_next; 698 --*counter;
672 fl->fl_next = NULL;
673
674 if (fl->fl_nspid) { 699 if (fl->fl_nspid) {
675 put_pid(fl->fl_nspid); 700 put_pid(fl->fl_nspid);
676 fl->fl_nspid = NULL; 701 fl->fl_nspid = NULL;
677 } 702 }
678
679 locks_wake_up_blocks(fl); 703 locks_wake_up_blocks(fl);
680} 704}
681 705
682/* 706static void
683 * Unlink a lock from all lists and free it. 707locks_delete_lock_ctx(struct file_lock *fl, int *counter,
684 * 708 struct list_head *dispose)
685 * Must be called with i_lock held!
686 */
687static void locks_delete_lock(struct file_lock **thisfl_p,
688 struct list_head *dispose)
689{ 709{
690 struct file_lock *fl = *thisfl_p; 710 locks_unlink_lock_ctx(fl, counter);
691
692 locks_unlink_lock(thisfl_p);
693 if (dispose) 711 if (dispose)
694 list_add(&fl->fl_block, dispose); 712 list_add(&fl->fl_list, dispose);
695 else 713 else
696 locks_free_lock(fl); 714 locks_free_lock(fl);
697} 715}
@@ -746,22 +764,27 @@ void
746posix_test_lock(struct file *filp, struct file_lock *fl) 764posix_test_lock(struct file *filp, struct file_lock *fl)
747{ 765{
748 struct file_lock *cfl; 766 struct file_lock *cfl;
767 struct file_lock_context *ctx;
749 struct inode *inode = file_inode(filp); 768 struct inode *inode = file_inode(filp);
750 769
751 spin_lock(&inode->i_lock); 770 ctx = inode->i_flctx;
752 for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { 771 if (!ctx || list_empty_careful(&ctx->flc_posix)) {
753 if (!IS_POSIX(cfl))
754 continue;
755 if (posix_locks_conflict(fl, cfl))
756 break;
757 }
758 if (cfl) {
759 locks_copy_conflock(fl, cfl);
760 if (cfl->fl_nspid)
761 fl->fl_pid = pid_vnr(cfl->fl_nspid);
762 } else
763 fl->fl_type = F_UNLCK; 772 fl->fl_type = F_UNLCK;
764 spin_unlock(&inode->i_lock); 773 return;
774 }
775
776 spin_lock(&ctx->flc_lock);
777 list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
778 if (posix_locks_conflict(fl, cfl)) {
779 locks_copy_conflock(fl, cfl);
780 if (cfl->fl_nspid)
781 fl->fl_pid = pid_vnr(cfl->fl_nspid);
782 goto out;
783 }
784 }
785 fl->fl_type = F_UNLCK;
786out:
787 spin_unlock(&ctx->flc_lock);
765 return; 788 return;
766} 789}
767EXPORT_SYMBOL(posix_test_lock); 790EXPORT_SYMBOL(posix_test_lock);
@@ -845,34 +868,34 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
845static int flock_lock_file(struct file *filp, struct file_lock *request) 868static int flock_lock_file(struct file *filp, struct file_lock *request)
846{ 869{
847 struct file_lock *new_fl = NULL; 870 struct file_lock *new_fl = NULL;
848 struct file_lock **before; 871 struct file_lock *fl;
849 struct inode * inode = file_inode(filp); 872 struct file_lock_context *ctx;
873 struct inode *inode = file_inode(filp);
850 int error = 0; 874 int error = 0;
851 int found = 0; 875 bool found = false;
852 LIST_HEAD(dispose); 876 LIST_HEAD(dispose);
853 877
878 ctx = locks_get_lock_context(inode);
879 if (!ctx)
880 return -ENOMEM;
881
854 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 882 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
855 new_fl = locks_alloc_lock(); 883 new_fl = locks_alloc_lock();
856 if (!new_fl) 884 if (!new_fl)
857 return -ENOMEM; 885 return -ENOMEM;
858 } 886 }
859 887
860 spin_lock(&inode->i_lock); 888 spin_lock(&ctx->flc_lock);
861 if (request->fl_flags & FL_ACCESS) 889 if (request->fl_flags & FL_ACCESS)
862 goto find_conflict; 890 goto find_conflict;
863 891
864 for_each_lock(inode, before) { 892 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
865 struct file_lock *fl = *before;
866 if (IS_POSIX(fl))
867 break;
868 if (IS_LEASE(fl))
869 continue;
870 if (filp != fl->fl_file) 893 if (filp != fl->fl_file)
871 continue; 894 continue;
872 if (request->fl_type == fl->fl_type) 895 if (request->fl_type == fl->fl_type)
873 goto out; 896 goto out;
874 found = 1; 897 found = true;
875 locks_delete_lock(before, &dispose); 898 locks_delete_lock_ctx(fl, &ctx->flc_flock_cnt, &dispose);
876 break; 899 break;
877 } 900 }
878 901
@@ -887,18 +910,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
887 * give it the opportunity to lock the file. 910 * give it the opportunity to lock the file.
888 */ 911 */
889 if (found) { 912 if (found) {
890 spin_unlock(&inode->i_lock); 913 spin_unlock(&ctx->flc_lock);
891 cond_resched(); 914 cond_resched();
892 spin_lock(&inode->i_lock); 915 spin_lock(&ctx->flc_lock);
893 } 916 }
894 917
895find_conflict: 918find_conflict:
896 for_each_lock(inode, before) { 919 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
897 struct file_lock *fl = *before;
898 if (IS_POSIX(fl))
899 break;
900 if (IS_LEASE(fl))
901 continue;
902 if (!flock_locks_conflict(request, fl)) 920 if (!flock_locks_conflict(request, fl))
903 continue; 921 continue;
904 error = -EAGAIN; 922 error = -EAGAIN;
@@ -911,12 +929,12 @@ find_conflict:
911 if (request->fl_flags & FL_ACCESS) 929 if (request->fl_flags & FL_ACCESS)
912 goto out; 930 goto out;
913 locks_copy_lock(new_fl, request); 931 locks_copy_lock(new_fl, request);
914 locks_insert_lock(before, new_fl); 932 locks_insert_lock_ctx(new_fl, &ctx->flc_flock_cnt, &ctx->flc_flock);
915 new_fl = NULL; 933 new_fl = NULL;
916 error = 0; 934 error = 0;
917 935
918out: 936out:
919 spin_unlock(&inode->i_lock); 937 spin_unlock(&ctx->flc_lock);
920 if (new_fl) 938 if (new_fl)
921 locks_free_lock(new_fl); 939 locks_free_lock(new_fl);
922 locks_dispose_list(&dispose); 940 locks_dispose_list(&dispose);
@@ -925,16 +943,20 @@ out:
925 943
926static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) 944static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
927{ 945{
928 struct file_lock *fl; 946 struct file_lock *fl, *tmp;
929 struct file_lock *new_fl = NULL; 947 struct file_lock *new_fl = NULL;
930 struct file_lock *new_fl2 = NULL; 948 struct file_lock *new_fl2 = NULL;
931 struct file_lock *left = NULL; 949 struct file_lock *left = NULL;
932 struct file_lock *right = NULL; 950 struct file_lock *right = NULL;
933 struct file_lock **before; 951 struct file_lock_context *ctx;
934 int error; 952 int error;
935 bool added = false; 953 bool added = false;
936 LIST_HEAD(dispose); 954 LIST_HEAD(dispose);
937 955
956 ctx = locks_get_lock_context(inode);
957 if (!ctx)
958 return -ENOMEM;
959
938 /* 960 /*
939 * We may need two file_lock structures for this operation, 961 * We may need two file_lock structures for this operation,
940 * so we get them in advance to avoid races. 962 * so we get them in advance to avoid races.
@@ -948,15 +970,14 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
948 new_fl2 = locks_alloc_lock(); 970 new_fl2 = locks_alloc_lock();
949 } 971 }
950 972
951 spin_lock(&inode->i_lock); 973 spin_lock(&ctx->flc_lock);
952 /* 974 /*
953 * New lock request. Walk all POSIX locks and look for conflicts. If 975 * New lock request. Walk all POSIX locks and look for conflicts. If
954 * there are any, either return error or put the request on the 976 * there are any, either return error or put the request on the
955 * blocker's list of waiters and the global blocked_hash. 977 * blocker's list of waiters and the global blocked_hash.
956 */ 978 */
957 if (request->fl_type != F_UNLCK) { 979 if (request->fl_type != F_UNLCK) {
958 for_each_lock(inode, before) { 980 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
959 fl = *before;
960 if (!IS_POSIX(fl)) 981 if (!IS_POSIX(fl))
961 continue; 982 continue;
962 if (!posix_locks_conflict(request, fl)) 983 if (!posix_locks_conflict(request, fl))
@@ -986,29 +1007,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
986 if (request->fl_flags & FL_ACCESS) 1007 if (request->fl_flags & FL_ACCESS)
987 goto out; 1008 goto out;
988 1009
989 /* 1010 /* Find the first old lock with the same owner as the new lock */
990 * Find the first old lock with the same owner as the new lock. 1011 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
991 */ 1012 if (posix_same_owner(request, fl))
992 1013 break;
993 before = &inode->i_flock;
994
995 /* First skip locks owned by other processes. */
996 while ((fl = *before) && (!IS_POSIX(fl) ||
997 !posix_same_owner(request, fl))) {
998 before = &fl->fl_next;
999 } 1014 }
1000 1015
1001 /* Process locks with this owner. */ 1016 /* Process locks with this owner. */
1002 while ((fl = *before) && posix_same_owner(request, fl)) { 1017 list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
1003 /* Detect adjacent or overlapping regions (if same lock type) 1018 if (!posix_same_owner(request, fl))
1004 */ 1019 break;
1020
1021 /* Detect adjacent or overlapping regions (if same lock type) */
1005 if (request->fl_type == fl->fl_type) { 1022 if (request->fl_type == fl->fl_type) {
1006 /* In all comparisons of start vs end, use 1023 /* In all comparisons of start vs end, use
1007 * "start - 1" rather than "end + 1". If end 1024 * "start - 1" rather than "end + 1". If end
1008 * is OFFSET_MAX, end + 1 will become negative. 1025 * is OFFSET_MAX, end + 1 will become negative.
1009 */ 1026 */
1010 if (fl->fl_end < request->fl_start - 1) 1027 if (fl->fl_end < request->fl_start - 1)
1011 goto next_lock; 1028 continue;
1012 /* If the next lock in the list has entirely bigger 1029 /* If the next lock in the list has entirely bigger
1013 * addresses than the new one, insert the lock here. 1030 * addresses than the new one, insert the lock here.
1014 */ 1031 */
@@ -1029,18 +1046,18 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1029 else 1046 else
1030 request->fl_end = fl->fl_end; 1047 request->fl_end = fl->fl_end;
1031 if (added) { 1048 if (added) {
1032 locks_delete_lock(before, &dispose); 1049 locks_delete_lock_ctx(fl, &ctx->flc_posix_cnt,
1050 &dispose);
1033 continue; 1051 continue;
1034 } 1052 }
1035 request = fl; 1053 request = fl;
1036 added = true; 1054 added = true;
1037 } 1055 } else {
1038 else {
1039 /* Processing for different lock types is a bit 1056 /* Processing for different lock types is a bit
1040 * more complex. 1057 * more complex.
1041 */ 1058 */
1042 if (fl->fl_end < request->fl_start) 1059 if (fl->fl_end < request->fl_start)
1043 goto next_lock; 1060 continue;
1044 if (fl->fl_start > request->fl_end) 1061 if (fl->fl_start > request->fl_end)
1045 break; 1062 break;
1046 if (request->fl_type == F_UNLCK) 1063 if (request->fl_type == F_UNLCK)
@@ -1059,7 +1076,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1059 * one (This may happen several times). 1076 * one (This may happen several times).
1060 */ 1077 */
1061 if (added) { 1078 if (added) {
1062 locks_delete_lock(before, &dispose); 1079 locks_delete_lock_ctx(fl,
1080 &ctx->flc_posix_cnt, &dispose);
1063 continue; 1081 continue;
1064 } 1082 }
1065 /* 1083 /*
@@ -1075,15 +1093,13 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1075 locks_copy_lock(new_fl, request); 1093 locks_copy_lock(new_fl, request);
1076 request = new_fl; 1094 request = new_fl;
1077 new_fl = NULL; 1095 new_fl = NULL;
1078 locks_delete_lock(before, &dispose); 1096 locks_insert_lock_ctx(request,
1079 locks_insert_lock(before, request); 1097 &ctx->flc_posix_cnt, &fl->fl_list);
1098 locks_delete_lock_ctx(fl,
1099 &ctx->flc_posix_cnt, &dispose);
1080 added = true; 1100 added = true;
1081 } 1101 }
1082 } 1102 }
1083 /* Go on to next lock.
1084 */
1085 next_lock:
1086 before = &fl->fl_next;
1087 } 1103 }
1088 1104
1089 /* 1105 /*
@@ -1108,7 +1124,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1108 goto out; 1124 goto out;
1109 } 1125 }
1110 locks_copy_lock(new_fl, request); 1126 locks_copy_lock(new_fl, request);
1111 locks_insert_lock(before, new_fl); 1127 locks_insert_lock_ctx(new_fl, &ctx->flc_posix_cnt,
1128 &fl->fl_list);
1112 new_fl = NULL; 1129 new_fl = NULL;
1113 } 1130 }
1114 if (right) { 1131 if (right) {
@@ -1119,7 +1136,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1119 left = new_fl2; 1136 left = new_fl2;
1120 new_fl2 = NULL; 1137 new_fl2 = NULL;
1121 locks_copy_lock(left, right); 1138 locks_copy_lock(left, right);
1122 locks_insert_lock(before, left); 1139 locks_insert_lock_ctx(left, &ctx->flc_posix_cnt,
1140 &fl->fl_list);
1123 } 1141 }
1124 right->fl_start = request->fl_end + 1; 1142 right->fl_start = request->fl_end + 1;
1125 locks_wake_up_blocks(right); 1143 locks_wake_up_blocks(right);
@@ -1129,7 +1147,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1129 locks_wake_up_blocks(left); 1147 locks_wake_up_blocks(left);
1130 } 1148 }
1131 out: 1149 out:
1132 spin_unlock(&inode->i_lock); 1150 spin_unlock(&ctx->flc_lock);
1133 /* 1151 /*
1134 * Free any unused locks. 1152 * Free any unused locks.
1135 */ 1153 */
@@ -1199,22 +1217,29 @@ EXPORT_SYMBOL(posix_lock_file_wait);
1199 */ 1217 */
1200int locks_mandatory_locked(struct file *file) 1218int locks_mandatory_locked(struct file *file)
1201{ 1219{
1220 int ret;
1202 struct inode *inode = file_inode(file); 1221 struct inode *inode = file_inode(file);
1222 struct file_lock_context *ctx;
1203 struct file_lock *fl; 1223 struct file_lock *fl;
1204 1224
1225 ctx = inode->i_flctx;
1226 if (!ctx || list_empty_careful(&ctx->flc_posix))
1227 return 0;
1228
1205 /* 1229 /*
1206 * Search the lock list for this inode for any POSIX locks. 1230 * Search the lock list for this inode for any POSIX locks.
1207 */ 1231 */
1208 spin_lock(&inode->i_lock); 1232 spin_lock(&ctx->flc_lock);
1209 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 1233 ret = 0;
1210 if (!IS_POSIX(fl)) 1234 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1211 continue;
1212 if (fl->fl_owner != current->files && 1235 if (fl->fl_owner != current->files &&
1213 fl->fl_owner != file) 1236 fl->fl_owner != file) {
1237 ret = -EAGAIN;
1214 break; 1238 break;
1239 }
1215 } 1240 }
1216 spin_unlock(&inode->i_lock); 1241 spin_unlock(&ctx->flc_lock);
1217 return fl ? -EAGAIN : 0; 1242 return ret;
1218} 1243}
1219 1244
1220/** 1245/**
@@ -1294,9 +1319,9 @@ static void lease_clear_pending(struct file_lock *fl, int arg)
1294} 1319}
1295 1320
1296/* We already had a lease on this file; just change its type */ 1321/* We already had a lease on this file; just change its type */
1297int lease_modify(struct file_lock **before, int arg, struct list_head *dispose) 1322int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
1298{ 1323{
1299 struct file_lock *fl = *before; 1324 struct file_lock_context *flctx;
1300 int error = assign_type(fl, arg); 1325 int error = assign_type(fl, arg);
1301 1326
1302 if (error) 1327 if (error)
@@ -1306,6 +1331,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
1306 if (arg == F_UNLCK) { 1331 if (arg == F_UNLCK) {
1307 struct file *filp = fl->fl_file; 1332 struct file *filp = fl->fl_file;
1308 1333
1334 flctx = file_inode(filp)->i_flctx;
1309 f_delown(filp); 1335 f_delown(filp);
1310 filp->f_owner.signum = 0; 1336 filp->f_owner.signum = 0;
1311 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync); 1337 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
@@ -1313,7 +1339,7 @@ int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
1313 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); 1339 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1314 fl->fl_fasync = NULL; 1340 fl->fl_fasync = NULL;
1315 } 1341 }
1316 locks_delete_lock(before, dispose); 1342 locks_delete_lock_ctx(fl, &flctx->flc_lease_cnt, dispose);
1317 } 1343 }
1318 return 0; 1344 return 0;
1319} 1345}
@@ -1329,25 +1355,24 @@ static bool past_time(unsigned long then)
1329 1355
1330static void time_out_leases(struct inode *inode, struct list_head *dispose) 1356static void time_out_leases(struct inode *inode, struct list_head *dispose)
1331{ 1357{
1332 struct file_lock **before; 1358 struct file_lock_context *ctx = inode->i_flctx;
1333 struct file_lock *fl; 1359 struct file_lock *fl, *tmp;
1334 1360
1335 lockdep_assert_held(&inode->i_lock); 1361 lockdep_assert_held(&ctx->flc_lock);
1336 1362
1337 before = &inode->i_flock; 1363 list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1338 while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) {
1339 trace_time_out_leases(inode, fl); 1364 trace_time_out_leases(inode, fl);
1340 if (past_time(fl->fl_downgrade_time)) 1365 if (past_time(fl->fl_downgrade_time))
1341 lease_modify(before, F_RDLCK, dispose); 1366 lease_modify(fl, F_RDLCK, dispose);
1342 if (past_time(fl->fl_break_time)) 1367 if (past_time(fl->fl_break_time))
1343 lease_modify(before, F_UNLCK, dispose); 1368 lease_modify(fl, F_UNLCK, dispose);
1344 if (fl == *before) /* lease_modify may have freed fl */
1345 before = &fl->fl_next;
1346 } 1369 }
1347} 1370}
1348 1371
1349static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) 1372static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1350{ 1373{
1374 if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
1375 return false;
1351 if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) 1376 if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
1352 return false; 1377 return false;
1353 return locks_conflict(breaker, lease); 1378 return locks_conflict(breaker, lease);
@@ -1356,11 +1381,12 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1356static bool 1381static bool
1357any_leases_conflict(struct inode *inode, struct file_lock *breaker) 1382any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1358{ 1383{
1384 struct file_lock_context *ctx = inode->i_flctx;
1359 struct file_lock *fl; 1385 struct file_lock *fl;
1360 1386
1361 lockdep_assert_held(&inode->i_lock); 1387 lockdep_assert_held(&ctx->flc_lock);
1362 1388
1363 for (fl = inode->i_flock ; fl && IS_LEASE(fl); fl = fl->fl_next) { 1389 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1364 if (leases_conflict(fl, breaker)) 1390 if (leases_conflict(fl, breaker))
1365 return true; 1391 return true;
1366 } 1392 }
@@ -1384,7 +1410,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1384{ 1410{
1385 int error = 0; 1411 int error = 0;
1386 struct file_lock *new_fl; 1412 struct file_lock *new_fl;
1387 struct file_lock *fl, **before; 1413 struct file_lock_context *ctx = inode->i_flctx;
1414 struct file_lock *fl;
1388 unsigned long break_time; 1415 unsigned long break_time;
1389 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1416 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1390 LIST_HEAD(dispose); 1417 LIST_HEAD(dispose);
@@ -1394,7 +1421,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1394 return PTR_ERR(new_fl); 1421 return PTR_ERR(new_fl);
1395 new_fl->fl_flags = type; 1422 new_fl->fl_flags = type;
1396 1423
1397 spin_lock(&inode->i_lock); 1424 /* typically we will check that ctx is non-NULL before calling */
1425 if (!ctx) {
1426 WARN_ON_ONCE(1);
1427 return error;
1428 }
1429
1430 spin_lock(&ctx->flc_lock);
1398 1431
1399 time_out_leases(inode, &dispose); 1432 time_out_leases(inode, &dispose);
1400 1433
@@ -1408,9 +1441,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1408 break_time++; /* so that 0 means no break time */ 1441 break_time++; /* so that 0 means no break time */
1409 } 1442 }
1410 1443
1411 for (before = &inode->i_flock; 1444 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1412 ((fl = *before) != NULL) && IS_LEASE(fl);
1413 before = &fl->fl_next) {
1414 if (!leases_conflict(fl, new_fl)) 1445 if (!leases_conflict(fl, new_fl))
1415 continue; 1446 continue;
1416 if (want_write) { 1447 if (want_write) {
@@ -1419,17 +1450,17 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1419 fl->fl_flags |= FL_UNLOCK_PENDING; 1450 fl->fl_flags |= FL_UNLOCK_PENDING;
1420 fl->fl_break_time = break_time; 1451 fl->fl_break_time = break_time;
1421 } else { 1452 } else {
1422 if (lease_breaking(inode->i_flock)) 1453 if (lease_breaking(fl))
1423 continue; 1454 continue;
1424 fl->fl_flags |= FL_DOWNGRADE_PENDING; 1455 fl->fl_flags |= FL_DOWNGRADE_PENDING;
1425 fl->fl_downgrade_time = break_time; 1456 fl->fl_downgrade_time = break_time;
1426 } 1457 }
1427 if (fl->fl_lmops->lm_break(fl)) 1458 if (fl->fl_lmops->lm_break(fl))
1428 locks_delete_lock(before, &dispose); 1459 locks_delete_lock_ctx(fl, &ctx->flc_lease_cnt,
1460 &dispose);
1429 } 1461 }
1430 1462
1431 fl = inode->i_flock; 1463 if (list_empty(&ctx->flc_lease))
1432 if (!fl || !IS_LEASE(fl))
1433 goto out; 1464 goto out;
1434 1465
1435 if (mode & O_NONBLOCK) { 1466 if (mode & O_NONBLOCK) {
@@ -1439,18 +1470,19 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1439 } 1470 }
1440 1471
1441restart: 1472restart:
1442 break_time = inode->i_flock->fl_break_time; 1473 fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
1474 break_time = fl->fl_break_time;
1443 if (break_time != 0) 1475 if (break_time != 0)
1444 break_time -= jiffies; 1476 break_time -= jiffies;
1445 if (break_time == 0) 1477 if (break_time == 0)
1446 break_time++; 1478 break_time++;
1447 locks_insert_block(inode->i_flock, new_fl); 1479 locks_insert_block(fl, new_fl);
1448 trace_break_lease_block(inode, new_fl); 1480 trace_break_lease_block(inode, new_fl);
1449 spin_unlock(&inode->i_lock); 1481 spin_unlock(&ctx->flc_lock);
1450 locks_dispose_list(&dispose); 1482 locks_dispose_list(&dispose);
1451 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1483 error = wait_event_interruptible_timeout(new_fl->fl_wait,
1452 !new_fl->fl_next, break_time); 1484 !new_fl->fl_next, break_time);
1453 spin_lock(&inode->i_lock); 1485 spin_lock(&ctx->flc_lock);
1454 trace_break_lease_unblock(inode, new_fl); 1486 trace_break_lease_unblock(inode, new_fl);
1455 locks_delete_block(new_fl); 1487 locks_delete_block(new_fl);
1456 if (error >= 0) { 1488 if (error >= 0) {
@@ -1462,12 +1494,10 @@ restart:
1462 time_out_leases(inode, &dispose); 1494 time_out_leases(inode, &dispose);
1463 if (any_leases_conflict(inode, new_fl)) 1495 if (any_leases_conflict(inode, new_fl))
1464 goto restart; 1496 goto restart;
1465
1466 error = 0; 1497 error = 0;
1467 } 1498 }
1468
1469out: 1499out:
1470 spin_unlock(&inode->i_lock); 1500 spin_unlock(&ctx->flc_lock);
1471 locks_dispose_list(&dispose); 1501 locks_dispose_list(&dispose);
1472 locks_free_lock(new_fl); 1502 locks_free_lock(new_fl);
1473 return error; 1503 return error;
@@ -1487,14 +1517,18 @@ EXPORT_SYMBOL(__break_lease);
1487void lease_get_mtime(struct inode *inode, struct timespec *time) 1517void lease_get_mtime(struct inode *inode, struct timespec *time)
1488{ 1518{
1489 bool has_lease = false; 1519 bool has_lease = false;
1490 struct file_lock *flock; 1520 struct file_lock_context *ctx = inode->i_flctx;
1521 struct file_lock *fl;
1491 1522
1492 if (inode->i_flock) { 1523 if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1493 spin_lock(&inode->i_lock); 1524 spin_lock(&ctx->flc_lock);
1494 flock = inode->i_flock; 1525 if (!list_empty(&ctx->flc_lease)) {
1495 if (flock && IS_LEASE(flock) && (flock->fl_type == F_WRLCK)) 1526 fl = list_first_entry(&ctx->flc_lease,
1496 has_lease = true; 1527 struct file_lock, fl_list);
1497 spin_unlock(&inode->i_lock); 1528 if (fl->fl_type == F_WRLCK)
1529 has_lease = true;
1530 }
1531 spin_unlock(&ctx->flc_lock);
1498 } 1532 }
1499 1533
1500 if (has_lease) 1534 if (has_lease)
@@ -1532,20 +1566,22 @@ int fcntl_getlease(struct file *filp)
1532{ 1566{
1533 struct file_lock *fl; 1567 struct file_lock *fl;
1534 struct inode *inode = file_inode(filp); 1568 struct inode *inode = file_inode(filp);
1569 struct file_lock_context *ctx = inode->i_flctx;
1535 int type = F_UNLCK; 1570 int type = F_UNLCK;
1536 LIST_HEAD(dispose); 1571 LIST_HEAD(dispose);
1537 1572
1538 spin_lock(&inode->i_lock); 1573 if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1539 time_out_leases(file_inode(filp), &dispose); 1574 spin_lock(&ctx->flc_lock);
1540 for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); 1575 time_out_leases(file_inode(filp), &dispose);
1541 fl = fl->fl_next) { 1576 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1542 if (fl->fl_file == filp) { 1577 if (fl->fl_file != filp)
1578 continue;
1543 type = target_leasetype(fl); 1579 type = target_leasetype(fl);
1544 break; 1580 break;
1545 } 1581 }
1582 spin_unlock(&ctx->flc_lock);
1583 locks_dispose_list(&dispose);
1546 } 1584 }
1547 spin_unlock(&inode->i_lock);
1548 locks_dispose_list(&dispose);
1549 return type; 1585 return type;
1550} 1586}
1551 1587
@@ -1560,11 +1596,14 @@ int fcntl_getlease(struct file *filp)
1560 * conflict with the lease we're trying to set. 1596 * conflict with the lease we're trying to set.
1561 */ 1597 */
1562static int 1598static int
1563check_conflicting_open(const struct dentry *dentry, const long arg) 1599check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
1564{ 1600{
1565 int ret = 0; 1601 int ret = 0;
1566 struct inode *inode = dentry->d_inode; 1602 struct inode *inode = dentry->d_inode;
1567 1603
1604 if (flags & FL_LAYOUT)
1605 return 0;
1606
1568 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1607 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1569 return -EAGAIN; 1608 return -EAGAIN;
1570 1609
@@ -1578,9 +1617,10 @@ check_conflicting_open(const struct dentry *dentry, const long arg)
1578static int 1617static int
1579generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) 1618generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
1580{ 1619{
1581 struct file_lock *fl, **before, **my_before = NULL, *lease; 1620 struct file_lock *fl, *my_fl = NULL, *lease;
1582 struct dentry *dentry = filp->f_path.dentry; 1621 struct dentry *dentry = filp->f_path.dentry;
1583 struct inode *inode = dentry->d_inode; 1622 struct inode *inode = dentry->d_inode;
1623 struct file_lock_context *ctx;
1584 bool is_deleg = (*flp)->fl_flags & FL_DELEG; 1624 bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1585 int error; 1625 int error;
1586 LIST_HEAD(dispose); 1626 LIST_HEAD(dispose);
@@ -1588,6 +1628,10 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1588 lease = *flp; 1628 lease = *flp;
1589 trace_generic_add_lease(inode, lease); 1629 trace_generic_add_lease(inode, lease);
1590 1630
1631 ctx = locks_get_lock_context(inode);
1632 if (!ctx)
1633 return -ENOMEM;
1634
1591 /* 1635 /*
1592 * In the delegation case we need mutual exclusion with 1636 * In the delegation case we need mutual exclusion with
1593 * a number of operations that take the i_mutex. We trylock 1637 * a number of operations that take the i_mutex. We trylock
@@ -1606,9 +1650,9 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1606 return -EINVAL; 1650 return -EINVAL;
1607 } 1651 }
1608 1652
1609 spin_lock(&inode->i_lock); 1653 spin_lock(&ctx->flc_lock);
1610 time_out_leases(inode, &dispose); 1654 time_out_leases(inode, &dispose);
1611 error = check_conflicting_open(dentry, arg); 1655 error = check_conflicting_open(dentry, arg, lease->fl_flags);
1612 if (error) 1656 if (error)
1613 goto out; 1657 goto out;
1614 1658
@@ -1621,13 +1665,13 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1621 * except for this filp. 1665 * except for this filp.
1622 */ 1666 */
1623 error = -EAGAIN; 1667 error = -EAGAIN;
1624 for (before = &inode->i_flock; 1668 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1625 ((fl = *before) != NULL) && IS_LEASE(fl); 1669 if (fl->fl_file == filp &&
1626 before = &fl->fl_next) { 1670 fl->fl_owner == lease->fl_owner) {
1627 if (fl->fl_file == filp) { 1671 my_fl = fl;
1628 my_before = before;
1629 continue; 1672 continue;
1630 } 1673 }
1674
1631 /* 1675 /*
1632 * No exclusive leases if someone else has a lease on 1676 * No exclusive leases if someone else has a lease on
1633 * this file: 1677 * this file:
@@ -1642,9 +1686,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1642 goto out; 1686 goto out;
1643 } 1687 }
1644 1688
1645 if (my_before != NULL) { 1689 if (my_fl != NULL) {
1646 lease = *my_before; 1690 error = lease->fl_lmops->lm_change(my_fl, arg, &dispose);
1647 error = lease->fl_lmops->lm_change(my_before, arg, &dispose);
1648 if (error) 1691 if (error)
1649 goto out; 1692 goto out;
1650 goto out_setup; 1693 goto out_setup;
@@ -1654,7 +1697,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1654 if (!leases_enable) 1697 if (!leases_enable)
1655 goto out; 1698 goto out;
1656 1699
1657 locks_insert_lock(before, lease); 1700 locks_insert_lock_ctx(lease, &ctx->flc_lease_cnt, &ctx->flc_lease);
1658 /* 1701 /*
1659 * The check in break_lease() is lockless. It's possible for another 1702 * The check in break_lease() is lockless. It's possible for another
1660 * open to race in after we did the earlier check for a conflicting 1703 * open to race in after we did the earlier check for a conflicting
@@ -1665,46 +1708,51 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1665 * precedes these checks. 1708 * precedes these checks.
1666 */ 1709 */
1667 smp_mb(); 1710 smp_mb();
1668 error = check_conflicting_open(dentry, arg); 1711 error = check_conflicting_open(dentry, arg, lease->fl_flags);
1669 if (error) 1712 if (error) {
1670 goto out_unlink; 1713 locks_unlink_lock_ctx(lease, &ctx->flc_lease_cnt);
1714 goto out;
1715 }
1671 1716
1672out_setup: 1717out_setup:
1673 if (lease->fl_lmops->lm_setup) 1718 if (lease->fl_lmops->lm_setup)
1674 lease->fl_lmops->lm_setup(lease, priv); 1719 lease->fl_lmops->lm_setup(lease, priv);
1675out: 1720out:
1676 spin_unlock(&inode->i_lock); 1721 spin_unlock(&ctx->flc_lock);
1677 locks_dispose_list(&dispose); 1722 locks_dispose_list(&dispose);
1678 if (is_deleg) 1723 if (is_deleg)
1679 mutex_unlock(&inode->i_mutex); 1724 mutex_unlock(&inode->i_mutex);
1680 if (!error && !my_before) 1725 if (!error && !my_fl)
1681 *flp = NULL; 1726 *flp = NULL;
1682 return error; 1727 return error;
1683out_unlink:
1684 locks_unlink_lock(before);
1685 goto out;
1686} 1728}
1687 1729
1688static int generic_delete_lease(struct file *filp) 1730static int generic_delete_lease(struct file *filp, void *owner)
1689{ 1731{
1690 int error = -EAGAIN; 1732 int error = -EAGAIN;
1691 struct file_lock *fl, **before; 1733 struct file_lock *fl, *victim = NULL;
1692 struct dentry *dentry = filp->f_path.dentry; 1734 struct dentry *dentry = filp->f_path.dentry;
1693 struct inode *inode = dentry->d_inode; 1735 struct inode *inode = dentry->d_inode;
1736 struct file_lock_context *ctx = inode->i_flctx;
1694 LIST_HEAD(dispose); 1737 LIST_HEAD(dispose);
1695 1738
1696 spin_lock(&inode->i_lock); 1739 if (!ctx) {
1697 time_out_leases(inode, &dispose); 1740 trace_generic_delete_lease(inode, NULL);
1698 for (before = &inode->i_flock; 1741 return error;
1699 ((fl = *before) != NULL) && IS_LEASE(fl); 1742 }
1700 before = &fl->fl_next) { 1743
1701 if (fl->fl_file == filp) 1744 spin_lock(&ctx->flc_lock);
1745 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1746 if (fl->fl_file == filp &&
1747 fl->fl_owner == owner) {
1748 victim = fl;
1702 break; 1749 break;
1750 }
1703 } 1751 }
1704 trace_generic_delete_lease(inode, fl); 1752 trace_generic_delete_lease(inode, fl);
1705 if (fl && IS_LEASE(fl)) 1753 if (victim)
1706 error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose); 1754 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1707 spin_unlock(&inode->i_lock); 1755 spin_unlock(&ctx->flc_lock);
1708 locks_dispose_list(&dispose); 1756 locks_dispose_list(&dispose);
1709 return error; 1757 return error;
1710} 1758}
@@ -1737,13 +1785,14 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
1737 1785
1738 switch (arg) { 1786 switch (arg) {
1739 case F_UNLCK: 1787 case F_UNLCK:
1740 return generic_delete_lease(filp); 1788 return generic_delete_lease(filp, *priv);
1741 case F_RDLCK: 1789 case F_RDLCK:
1742 case F_WRLCK: 1790 case F_WRLCK:
1743 if (!(*flp)->fl_lmops->lm_break) { 1791 if (!(*flp)->fl_lmops->lm_break) {
1744 WARN_ON_ONCE(1); 1792 WARN_ON_ONCE(1);
1745 return -ENOLCK; 1793 return -ENOLCK;
1746 } 1794 }
1795
1747 return generic_add_lease(filp, arg, flp, priv); 1796 return generic_add_lease(filp, arg, flp, priv);
1748 default: 1797 default:
1749 return -EINVAL; 1798 return -EINVAL;
@@ -1816,7 +1865,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1816int fcntl_setlease(unsigned int fd, struct file *filp, long arg) 1865int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1817{ 1866{
1818 if (arg == F_UNLCK) 1867 if (arg == F_UNLCK)
1819 return vfs_setlease(filp, F_UNLCK, NULL, NULL); 1868 return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
1820 return do_fcntl_add_lease(fd, filp, arg); 1869 return do_fcntl_add_lease(fd, filp, arg);
1821} 1870}
1822 1871
@@ -2171,7 +2220,7 @@ again:
2171 */ 2220 */
2172 /* 2221 /*
2173 * we need that spin_lock here - it prevents reordering between 2222 * we need that spin_lock here - it prevents reordering between
2174 * update of inode->i_flock and check for it done in close(). 2223 * update of i_flctx->flc_posix and check for it done in close().
2175 * rcu_read_lock() wouldn't do. 2224 * rcu_read_lock() wouldn't do.
2176 */ 2225 */
2177 spin_lock(&current->files->file_lock); 2226 spin_lock(&current->files->file_lock);
@@ -2331,13 +2380,14 @@ out:
2331void locks_remove_posix(struct file *filp, fl_owner_t owner) 2380void locks_remove_posix(struct file *filp, fl_owner_t owner)
2332{ 2381{
2333 struct file_lock lock; 2382 struct file_lock lock;
2383 struct file_lock_context *ctx = file_inode(filp)->i_flctx;
2334 2384
2335 /* 2385 /*
2336 * If there are no locks held on this file, we don't need to call 2386 * If there are no locks held on this file, we don't need to call
2337 * posix_lock_file(). Another process could be setting a lock on this 2387 * posix_lock_file(). Another process could be setting a lock on this
2338 * file at the same time, but we wouldn't remove that lock anyway. 2388 * file at the same time, but we wouldn't remove that lock anyway.
2339 */ 2389 */
2340 if (!file_inode(filp)->i_flock) 2390 if (!ctx || list_empty(&ctx->flc_posix))
2341 return; 2391 return;
2342 2392
2343 lock.fl_type = F_UNLCK; 2393 lock.fl_type = F_UNLCK;
@@ -2358,67 +2408,67 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
2358 2408
2359EXPORT_SYMBOL(locks_remove_posix); 2409EXPORT_SYMBOL(locks_remove_posix);
2360 2410
2411/* The i_flctx must be valid when calling into here */
2412static void
2413locks_remove_flock(struct file *filp)
2414{
2415 struct file_lock fl = {
2416 .fl_owner = filp,
2417 .fl_pid = current->tgid,
2418 .fl_file = filp,
2419 .fl_flags = FL_FLOCK,
2420 .fl_type = F_UNLCK,
2421 .fl_end = OFFSET_MAX,
2422 };
2423 struct file_lock_context *flctx = file_inode(filp)->i_flctx;
2424
2425 if (list_empty(&flctx->flc_flock))
2426 return;
2427
2428 if (filp->f_op->flock)
2429 filp->f_op->flock(filp, F_SETLKW, &fl);
2430 else
2431 flock_lock_file(filp, &fl);
2432
2433 if (fl.fl_ops && fl.fl_ops->fl_release_private)
2434 fl.fl_ops->fl_release_private(&fl);
2435}
2436
2437/* The i_flctx must be valid when calling into here */
2438static void
2439locks_remove_lease(struct file *filp)
2440{
2441 struct inode *inode = file_inode(filp);
2442 struct file_lock_context *ctx = inode->i_flctx;
2443 struct file_lock *fl, *tmp;
2444 LIST_HEAD(dispose);
2445
2446 if (list_empty(&ctx->flc_lease))
2447 return;
2448
2449 spin_lock(&ctx->flc_lock);
2450 list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
2451 lease_modify(fl, F_UNLCK, &dispose);
2452 spin_unlock(&ctx->flc_lock);
2453 locks_dispose_list(&dispose);
2454}
2455
2361/* 2456/*
2362 * This function is called on the last close of an open file. 2457 * This function is called on the last close of an open file.
2363 */ 2458 */
2364void locks_remove_file(struct file *filp) 2459void locks_remove_file(struct file *filp)
2365{ 2460{
2366 struct inode * inode = file_inode(filp); 2461 if (!file_inode(filp)->i_flctx)
2367 struct file_lock *fl;
2368 struct file_lock **before;
2369 LIST_HEAD(dispose);
2370
2371 if (!inode->i_flock)
2372 return; 2462 return;
2373 2463
2464 /* remove any OFD locks */
2374 locks_remove_posix(filp, filp); 2465 locks_remove_posix(filp, filp);
2375 2466
2376 if (filp->f_op->flock) { 2467 /* remove flock locks */
2377 struct file_lock fl = { 2468 locks_remove_flock(filp);
2378 .fl_owner = filp,
2379 .fl_pid = current->tgid,
2380 .fl_file = filp,
2381 .fl_flags = FL_FLOCK,
2382 .fl_type = F_UNLCK,
2383 .fl_end = OFFSET_MAX,
2384 };
2385 filp->f_op->flock(filp, F_SETLKW, &fl);
2386 if (fl.fl_ops && fl.fl_ops->fl_release_private)
2387 fl.fl_ops->fl_release_private(&fl);
2388 }
2389
2390 spin_lock(&inode->i_lock);
2391 before = &inode->i_flock;
2392
2393 while ((fl = *before) != NULL) {
2394 if (fl->fl_file == filp) {
2395 if (IS_LEASE(fl)) {
2396 lease_modify(before, F_UNLCK, &dispose);
2397 continue;
2398 }
2399
2400 /*
2401 * There's a leftover lock on the list of a type that
2402 * we didn't expect to see. Most likely a classic
2403 * POSIX lock that ended up not getting released
2404 * properly, or that raced onto the list somehow. Log
2405 * some info about it and then just remove it from
2406 * the list.
2407 */
2408 WARN(!IS_FLOCK(fl),
2409 "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n",
2410 MAJOR(inode->i_sb->s_dev),
2411 MINOR(inode->i_sb->s_dev), inode->i_ino,
2412 fl->fl_type, fl->fl_flags,
2413 fl->fl_start, fl->fl_end);
2414 2469
2415 locks_delete_lock(before, &dispose); 2470 /* remove any leases */
2416 continue; 2471 locks_remove_lease(filp);
2417 }
2418 before = &fl->fl_next;
2419 }
2420 spin_unlock(&inode->i_lock);
2421 locks_dispose_list(&dispose);
2422} 2472}
2423 2473
2424/** 2474/**
@@ -2621,6 +2671,9 @@ static int __init filelock_init(void)
2621{ 2671{
2622 int i; 2672 int i;
2623 2673
2674 flctx_cache = kmem_cache_create("file_lock_ctx",
2675 sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
2676
2624 filelock_cache = kmem_cache_create("file_lock_cache", 2677 filelock_cache = kmem_cache_create("file_lock_cache",
2625 sizeof(struct file_lock), 0, SLAB_PANIC, NULL); 2678 sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
2626 2679
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index e31e589369a4..01a9e16e9782 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -267,7 +267,6 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
267 if (inode) { 267 if (inode) {
268 atomic_set(&NCP_FINFO(inode)->opened, info->opened); 268 atomic_set(&NCP_FINFO(inode)->opened, info->opened);
269 269
270 inode->i_mapping->backing_dev_info = sb->s_bdi;
271 inode->i_ino = info->ino; 270 inode->i_ino = info->ino;
272 ncp_set_attr(inode, info); 271 ncp_set_attr(inode, info);
273 if (S_ISREG(inode->i_mode)) { 272 if (S_ISREG(inode->i_mode)) {
@@ -560,7 +559,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
560 server = NCP_SBP(sb); 559 server = NCP_SBP(sb);
561 memset(server, 0, sizeof(*server)); 560 memset(server, 0, sizeof(*server));
562 561
563 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); 562 error = bdi_setup_and_register(&server->bdi, "ncpfs");
564 if (error) 563 if (error)
565 goto out_fput; 564 goto out_fput;
566 565
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 4464eb06b0b6..a1f0685b42ff 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -85,25 +85,30 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
85{ 85{
86 struct inode *inode = state->inode; 86 struct inode *inode = state->inode;
87 struct file_lock *fl; 87 struct file_lock *fl;
88 struct file_lock_context *flctx = inode->i_flctx;
89 struct list_head *list;
88 int status = 0; 90 int status = 0;
89 91
90 if (inode->i_flock == NULL) 92 if (flctx == NULL)
91 goto out; 93 goto out;
92 94
93 /* Protect inode->i_flock using the i_lock */ 95 list = &flctx->flc_posix;
94 spin_lock(&inode->i_lock); 96 spin_lock(&flctx->flc_lock);
95 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 97restart:
96 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 98 list_for_each_entry(fl, list, fl_list) {
97 continue;
98 if (nfs_file_open_context(fl->fl_file) != ctx) 99 if (nfs_file_open_context(fl->fl_file) != ctx)
99 continue; 100 continue;
100 spin_unlock(&inode->i_lock); 101 spin_unlock(&flctx->flc_lock);
101 status = nfs4_lock_delegation_recall(fl, state, stateid); 102 status = nfs4_lock_delegation_recall(fl, state, stateid);
102 if (status < 0) 103 if (status < 0)
103 goto out; 104 goto out;
104 spin_lock(&inode->i_lock); 105 spin_lock(&flctx->flc_lock);
105 } 106 }
106 spin_unlock(&inode->i_lock); 107 if (list == &flctx->flc_posix) {
108 list = &flctx->flc_flock;
109 goto restart;
110 }
111 spin_unlock(&flctx->flc_lock);
107out: 112out:
108 return status; 113 return status;
109} 114}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2ab6f00dba5b..94712fc781fa 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -646,7 +646,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
646 .fault = filemap_fault, 646 .fault = filemap_fault,
647 .map_pages = filemap_map_pages, 647 .map_pages = filemap_map_pages,
648 .page_mkwrite = nfs_vm_page_mkwrite, 648 .page_mkwrite = nfs_vm_page_mkwrite,
649 .remap_pages = generic_file_remap_pages,
650}; 649};
651 650
652static int nfs_need_sync_write(struct file *filp, struct inode *inode) 651static int nfs_need_sync_write(struct file *filp, struct inode *inode)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 3c9769441f36..91e88a7ecef0 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -960,52 +960,19 @@ filelayout_mark_request_commit(struct nfs_page *req,
960{ 960{
961 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 961 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
962 u32 i, j; 962 u32 i, j;
963 struct list_head *list;
964 struct pnfs_commit_bucket *buckets;
965 963
966 if (fl->commit_through_mds) { 964 if (fl->commit_through_mds) {
967 list = &cinfo->mds->list; 965 nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
968 spin_lock(cinfo->lock); 966 } else {
969 goto mds_commit; 967 /* Note that we are calling nfs4_fl_calc_j_index on each page
970 } 968 * that ends up being committed to a data server. An attractive
971 969 * alternative is to add a field to nfs_write_data and nfs_page
972 /* Note that we are calling nfs4_fl_calc_j_index on each page 970 * to store the value calculated in filelayout_write_pagelist
973 * that ends up being committed to a data server. An attractive 971 * and just use that here.
974 * alternative is to add a field to nfs_write_data and nfs_page
975 * to store the value calculated in filelayout_write_pagelist
976 * and just use that here.
977 */
978 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
979 i = select_bucket_index(fl, j);
980 spin_lock(cinfo->lock);
981 buckets = cinfo->ds->buckets;
982 list = &buckets[i].written;
983 if (list_empty(list)) {
984 /* Non-empty buckets hold a reference on the lseg. That ref
985 * is normally transferred to the COMMIT call and released
986 * there. It could also be released if the last req is pulled
987 * off due to a rewrite, in which case it will be done in
988 * pnfs_generic_clear_request_commit
989 */ 972 */
990 buckets[i].wlseg = pnfs_get_lseg(lseg); 973 j = nfs4_fl_calc_j_index(lseg, req_offset(req));
991 } 974 i = select_bucket_index(fl, j);
992 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 975 pnfs_layout_mark_request_commit(req, lseg, cinfo, i);
993 cinfo->ds->nwritten++;
994
995mds_commit:
996 /* nfs_request_add_commit_list(). We need to add req to list without
997 * dropping cinfo lock.
998 */
999 set_bit(PG_CLEAN, &(req)->wb_flags);
1000 nfs_list_add_request(req, list);
1001 cinfo->mds->ncommit++;
1002 spin_unlock(cinfo->lock);
1003 if (!cinfo->dreq) {
1004 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1005 inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1006 BDI_RECLAIMABLE);
1007 __mark_inode_dirty(req->wb_context->dentry->d_inode,
1008 I_DIRTY_DATASYNC);
1009 } 976 }
1010} 977}
1011 978
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f29fb7d7e8f8..315cc68945b9 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1332,47 +1332,6 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
1332 return PNFS_ATTEMPTED; 1332 return PNFS_ATTEMPTED;
1333} 1333}
1334 1334
1335static void
1336ff_layout_mark_request_commit(struct nfs_page *req,
1337 struct pnfs_layout_segment *lseg,
1338 struct nfs_commit_info *cinfo,
1339 u32 ds_commit_idx)
1340{
1341 struct list_head *list;
1342 struct pnfs_commit_bucket *buckets;
1343
1344 spin_lock(cinfo->lock);
1345 buckets = cinfo->ds->buckets;
1346 list = &buckets[ds_commit_idx].written;
1347 if (list_empty(list)) {
1348 /* Non-empty buckets hold a reference on the lseg. That ref
1349 * is normally transferred to the COMMIT call and released
1350 * there. It could also be released if the last req is pulled
1351 * off due to a rewrite, in which case it will be done in
1352 * pnfs_common_clear_request_commit
1353 */
1354 WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL);
1355 buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg);
1356 }
1357 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1358 cinfo->ds->nwritten++;
1359
1360 /* nfs_request_add_commit_list(). We need to add req to list without
1361 * dropping cinfo lock.
1362 */
1363 set_bit(PG_CLEAN, &(req)->wb_flags);
1364 nfs_list_add_request(req, list);
1365 cinfo->mds->ncommit++;
1366 spin_unlock(cinfo->lock);
1367 if (!cinfo->dreq) {
1368 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1369 inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1370 BDI_RECLAIMABLE);
1371 __mark_inode_dirty(req->wb_context->dentry->d_inode,
1372 I_DIRTY_DATASYNC);
1373 }
1374}
1375
1376static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1335static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
1377{ 1336{
1378 return i; 1337 return i;
@@ -1540,7 +1499,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
1540 .pg_write_ops = &ff_layout_pg_write_ops, 1499 .pg_write_ops = &ff_layout_pg_write_ops,
1541 .get_ds_info = ff_layout_get_ds_info, 1500 .get_ds_info = ff_layout_get_ds_info,
1542 .free_deviceid_node = ff_layout_free_deveiceid_node, 1501 .free_deviceid_node = ff_layout_free_deveiceid_node,
1543 .mark_request_commit = ff_layout_mark_request_commit, 1502 .mark_request_commit = pnfs_layout_mark_request_commit,
1544 .clear_request_commit = pnfs_generic_clear_request_commit, 1503 .clear_request_commit = pnfs_generic_clear_request_commit,
1545 .scan_commit_lists = pnfs_generic_scan_commit_lists, 1504 .scan_commit_lists = pnfs_generic_scan_commit_lists,
1546 .recover_commit_reqs = pnfs_generic_recover_commit_reqs, 1505 .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e211f975a69a..83107be3dd01 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -388,7 +388,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
388 if (S_ISREG(inode->i_mode)) { 388 if (S_ISREG(inode->i_mode)) {
389 inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; 389 inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
390 inode->i_data.a_ops = &nfs_file_aops; 390 inode->i_data.a_ops = &nfs_file_aops;
391 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
392 } else if (S_ISDIR(inode->i_mode)) { 391 } else if (S_ISDIR(inode->i_mode)) {
393 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 392 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
394 inode->i_fop = &nfs_dir_operations; 393 inode->i_fop = &nfs_dir_operations;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 21469e6e3834..b802fb3a2d99 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -430,7 +430,6 @@ int nfs_show_options(struct seq_file *, struct dentry *);
430int nfs_show_devname(struct seq_file *, struct dentry *); 430int nfs_show_devname(struct seq_file *, struct dentry *);
431int nfs_show_path(struct seq_file *, struct dentry *); 431int nfs_show_path(struct seq_file *, struct dentry *);
432int nfs_show_stats(struct seq_file *, struct dentry *); 432int nfs_show_stats(struct seq_file *, struct dentry *);
433void nfs_put_super(struct super_block *);
434int nfs_remount(struct super_block *sb, int *flags, char *raw_data); 433int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
435 434
436/* write.c */ 435/* write.c */
@@ -599,6 +598,19 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
599} 598}
600 599
601/* 600/*
601 * Record the page as unstable and mark its inode as dirty.
602 */
603static inline
604void nfs_mark_page_unstable(struct page *page)
605{
606 struct inode *inode = page_file_mapping(page)->host;
607
608 inc_zone_page_state(page, NR_UNSTABLE_NFS);
609 inc_bdi_stat(inode_to_bdi(inode), BDI_RECLAIMABLE);
610 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
611}
612
613/*
602 * Determine the number of bytes of data the page contains 614 * Determine the number of bytes of data the page contains
603 */ 615 */
604static inline 616static inline
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 590f096fd011..5ad908e9ce9c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1373,49 +1373,55 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
1373 struct nfs_inode *nfsi = NFS_I(inode); 1373 struct nfs_inode *nfsi = NFS_I(inode);
1374 struct file_lock *fl; 1374 struct file_lock *fl;
1375 int status = 0; 1375 int status = 0;
1376 struct file_lock_context *flctx = inode->i_flctx;
1377 struct list_head *list;
1376 1378
1377 if (inode->i_flock == NULL) 1379 if (flctx == NULL)
1378 return 0; 1380 return 0;
1379 1381
1382 list = &flctx->flc_posix;
1383
1380 /* Guard against delegation returns and new lock/unlock calls */ 1384 /* Guard against delegation returns and new lock/unlock calls */
1381 down_write(&nfsi->rwsem); 1385 down_write(&nfsi->rwsem);
1382 /* Protect inode->i_flock using the BKL */ 1386 spin_lock(&flctx->flc_lock);
1383 spin_lock(&inode->i_lock); 1387restart:
1384 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 1388 list_for_each_entry(fl, list, fl_list) {
1385 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
1386 continue;
1387 if (nfs_file_open_context(fl->fl_file)->state != state) 1389 if (nfs_file_open_context(fl->fl_file)->state != state)
1388 continue; 1390 continue;
1389 spin_unlock(&inode->i_lock); 1391 spin_unlock(&flctx->flc_lock);
1390 status = ops->recover_lock(state, fl); 1392 status = ops->recover_lock(state, fl);
1391 switch (status) { 1393 switch (status) {
1392 case 0: 1394 case 0:
1393 break; 1395 break;
1394 case -ESTALE: 1396 case -ESTALE:
1395 case -NFS4ERR_ADMIN_REVOKED: 1397 case -NFS4ERR_ADMIN_REVOKED:
1396 case -NFS4ERR_STALE_STATEID: 1398 case -NFS4ERR_STALE_STATEID:
1397 case -NFS4ERR_BAD_STATEID: 1399 case -NFS4ERR_BAD_STATEID:
1398 case -NFS4ERR_EXPIRED: 1400 case -NFS4ERR_EXPIRED:
1399 case -NFS4ERR_NO_GRACE: 1401 case -NFS4ERR_NO_GRACE:
1400 case -NFS4ERR_STALE_CLIENTID: 1402 case -NFS4ERR_STALE_CLIENTID:
1401 case -NFS4ERR_BADSESSION: 1403 case -NFS4ERR_BADSESSION:
1402 case -NFS4ERR_BADSLOT: 1404 case -NFS4ERR_BADSLOT:
1403 case -NFS4ERR_BAD_HIGH_SLOT: 1405 case -NFS4ERR_BAD_HIGH_SLOT:
1404 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1406 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1405 goto out; 1407 goto out;
1406 default: 1408 default:
1407 printk(KERN_ERR "NFS: %s: unhandled error %d\n", 1409 pr_err("NFS: %s: unhandled error %d\n",
1408 __func__, status); 1410 __func__, status);
1409 case -ENOMEM: 1411 case -ENOMEM:
1410 case -NFS4ERR_DENIED: 1412 case -NFS4ERR_DENIED:
1411 case -NFS4ERR_RECLAIM_BAD: 1413 case -NFS4ERR_RECLAIM_BAD:
1412 case -NFS4ERR_RECLAIM_CONFLICT: 1414 case -NFS4ERR_RECLAIM_CONFLICT:
1413 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 1415 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
1414 status = 0; 1416 status = 0;
1415 } 1417 }
1416 spin_lock(&inode->i_lock); 1418 spin_lock(&flctx->flc_lock);
1417 } 1419 }
1418 spin_unlock(&inode->i_lock); 1420 if (list == &flctx->flc_posix) {
1421 list = &flctx->flc_flock;
1422 goto restart;
1423 }
1424 spin_unlock(&flctx->flc_lock);
1419out: 1425out:
1420 up_write(&nfsi->rwsem); 1426 up_write(&nfsi->rwsem);
1421 return status; 1427 return status;
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 48cea3c30e5d..75090feeafad 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -53,7 +53,6 @@ static const struct super_operations nfs4_sops = {
53 .destroy_inode = nfs_destroy_inode, 53 .destroy_inode = nfs_destroy_inode,
54 .write_inode = nfs4_write_inode, 54 .write_inode = nfs4_write_inode,
55 .drop_inode = nfs_drop_inode, 55 .drop_inode = nfs_drop_inode,
56 .put_super = nfs_put_super,
57 .statfs = nfs_statfs, 56 .statfs = nfs_statfs,
58 .evict_inode = nfs4_evict_inode, 57 .evict_inode = nfs4_evict_inode,
59 .umount_begin = nfs_umount_begin, 58 .umount_begin = nfs_umount_begin,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 960c99f75d3f..d57190a0d533 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -933,11 +933,15 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
933 struct nfs_pageio_descriptor *pgio) 933 struct nfs_pageio_descriptor *pgio)
934{ 934{
935 size_t size; 935 size_t size;
936 struct file_lock_context *flctx;
936 937
937 if (prev) { 938 if (prev) {
938 if (!nfs_match_open_context(req->wb_context, prev->wb_context)) 939 if (!nfs_match_open_context(req->wb_context, prev->wb_context))
939 return false; 940 return false;
940 if (req->wb_context->dentry->d_inode->i_flock != NULL && 941 flctx = req->wb_context->dentry->d_inode->i_flctx;
942 if (flctx != NULL &&
943 !(list_empty_careful(&flctx->flc_posix) &&
944 list_empty_careful(&flctx->flc_flock)) &&
941 !nfs_match_lock_context(req->wb_lock_context, 945 !nfs_match_lock_context(req->wb_lock_context,
942 prev->wb_lock_context)) 946 prev->wb_lock_context))
943 return false; 947 return false;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 797cd6253adf..635f0865671c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -344,6 +344,10 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
344struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, 344struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
345 struct xdr_stream *xdr, 345 struct xdr_stream *xdr,
346 gfp_t gfp_flags); 346 gfp_t gfp_flags);
347void pnfs_layout_mark_request_commit(struct nfs_page *req,
348 struct pnfs_layout_segment *lseg,
349 struct nfs_commit_info *cinfo,
350 u32 ds_commit_idx);
347 351
348static inline bool nfs_have_layout(struct inode *inode) 352static inline bool nfs_have_layout(struct inode *inode)
349{ 353{
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index fdc4f6562bb7..54e36b38fb5f 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -838,3 +838,33 @@ out_err:
838 return NULL; 838 return NULL;
839} 839}
840EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr); 840EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
841
842void
843pnfs_layout_mark_request_commit(struct nfs_page *req,
844 struct pnfs_layout_segment *lseg,
845 struct nfs_commit_info *cinfo,
846 u32 ds_commit_idx)
847{
848 struct list_head *list;
849 struct pnfs_commit_bucket *buckets;
850
851 spin_lock(cinfo->lock);
852 buckets = cinfo->ds->buckets;
853 list = &buckets[ds_commit_idx].written;
854 if (list_empty(list)) {
855 /* Non-empty buckets hold a reference on the lseg. That ref
856 * is normally transferred to the COMMIT call and released
857 * there. It could also be released if the last req is pulled
858 * off due to a rewrite, in which case it will be done in
859 * pnfs_common_clear_request_commit
860 */
861 WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL);
862 buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg);
863 }
864 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
865 cinfo->ds->nwritten++;
866 spin_unlock(cinfo->lock);
867
868 nfs_request_add_commit_list(req, list, cinfo);
869}
870EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 368d9395d2e7..322b2de02988 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -311,7 +311,6 @@ const struct super_operations nfs_sops = {
311 .destroy_inode = nfs_destroy_inode, 311 .destroy_inode = nfs_destroy_inode,
312 .write_inode = nfs_write_inode, 312 .write_inode = nfs_write_inode,
313 .drop_inode = nfs_drop_inode, 313 .drop_inode = nfs_drop_inode,
314 .put_super = nfs_put_super,
315 .statfs = nfs_statfs, 314 .statfs = nfs_statfs,
316 .evict_inode = nfs_evict_inode, 315 .evict_inode = nfs_evict_inode,
317 .umount_begin = nfs_umount_begin, 316 .umount_begin = nfs_umount_begin,
@@ -2572,7 +2571,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
2572 error = nfs_bdi_register(server); 2571 error = nfs_bdi_register(server);
2573 if (error) { 2572 if (error) {
2574 mntroot = ERR_PTR(error); 2573 mntroot = ERR_PTR(error);
2575 goto error_splat_bdi; 2574 goto error_splat_super;
2576 } 2575 }
2577 server->super = s; 2576 server->super = s;
2578 } 2577 }
@@ -2604,9 +2603,6 @@ error_splat_root:
2604 dput(mntroot); 2603 dput(mntroot);
2605 mntroot = ERR_PTR(error); 2604 mntroot = ERR_PTR(error);
2606error_splat_super: 2605error_splat_super:
2607 if (server && !s->s_root)
2608 bdi_unregister(&server->backing_dev_info);
2609error_splat_bdi:
2610 deactivate_locked_super(s); 2606 deactivate_locked_super(s);
2611 goto out; 2607 goto out;
2612} 2608}
@@ -2654,27 +2650,19 @@ out:
2654EXPORT_SYMBOL_GPL(nfs_fs_mount); 2650EXPORT_SYMBOL_GPL(nfs_fs_mount);
2655 2651
2656/* 2652/*
2657 * Ensure that we unregister the bdi before kill_anon_super
2658 * releases the device name
2659 */
2660void nfs_put_super(struct super_block *s)
2661{
2662 struct nfs_server *server = NFS_SB(s);
2663
2664 bdi_unregister(&server->backing_dev_info);
2665}
2666EXPORT_SYMBOL_GPL(nfs_put_super);
2667
2668/*
2669 * Destroy an NFS2/3 superblock 2653 * Destroy an NFS2/3 superblock
2670 */ 2654 */
2671void nfs_kill_super(struct super_block *s) 2655void nfs_kill_super(struct super_block *s)
2672{ 2656{
2673 struct nfs_server *server = NFS_SB(s); 2657 struct nfs_server *server = NFS_SB(s);
2658 dev_t dev = s->s_dev;
2659
2660 generic_shutdown_super(s);
2674 2661
2675 kill_anon_super(s);
2676 nfs_fscache_release_super_cookie(s); 2662 nfs_fscache_release_super_cookie(s);
2663
2677 nfs_free_server(server); 2664 nfs_free_server(server);
2665 free_anon_bdev(dev);
2678} 2666}
2679EXPORT_SYMBOL_GPL(nfs_kill_super); 2667EXPORT_SYMBOL_GPL(nfs_kill_super);
2680 2668
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ceacfeeb28c2..595d81e354d1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -789,13 +789,8 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
789 nfs_list_add_request(req, dst); 789 nfs_list_add_request(req, dst);
790 cinfo->mds->ncommit++; 790 cinfo->mds->ncommit++;
791 spin_unlock(cinfo->lock); 791 spin_unlock(cinfo->lock);
792 if (!cinfo->dreq) { 792 if (!cinfo->dreq)
793 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 793 nfs_mark_page_unstable(req->wb_page);
794 inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
795 BDI_RECLAIMABLE);
796 __mark_inode_dirty(req->wb_context->dentry->d_inode,
797 I_DIRTY_DATASYNC);
798 }
799} 794}
800EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); 795EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
801 796
@@ -858,7 +853,7 @@ static void
858nfs_clear_page_commit(struct page *page) 853nfs_clear_page_commit(struct page *page)
859{ 854{
860 dec_zone_page_state(page, NR_UNSTABLE_NFS); 855 dec_zone_page_state(page, NR_UNSTABLE_NFS);
861 dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); 856 dec_bdi_stat(inode_to_bdi(page_file_mapping(page)->host), BDI_RECLAIMABLE);
862} 857}
863 858
864/* Called holding inode (/cinfo) lock */ 859/* Called holding inode (/cinfo) lock */
@@ -1097,6 +1092,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
1097{ 1092{
1098 struct nfs_open_context *ctx = nfs_file_open_context(file); 1093 struct nfs_open_context *ctx = nfs_file_open_context(file);
1099 struct nfs_lock_context *l_ctx; 1094 struct nfs_lock_context *l_ctx;
1095 struct file_lock_context *flctx = file_inode(file)->i_flctx;
1100 struct nfs_page *req; 1096 struct nfs_page *req;
1101 int do_flush, status; 1097 int do_flush, status;
1102 /* 1098 /*
@@ -1115,7 +1111,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
1115 do_flush = req->wb_page != page || req->wb_context != ctx; 1111 do_flush = req->wb_page != page || req->wb_context != ctx;
1116 /* for now, flush if more than 1 request in page_group */ 1112 /* for now, flush if more than 1 request in page_group */
1117 do_flush |= req->wb_this_page != req; 1113 do_flush |= req->wb_this_page != req;
1118 if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { 1114 if (l_ctx && flctx &&
1115 !(list_empty_careful(&flctx->flc_posix) &&
1116 list_empty_careful(&flctx->flc_flock))) {
1119 do_flush |= l_ctx->lockowner.l_owner != current->files 1117 do_flush |= l_ctx->lockowner.l_owner != current->files
1120 || l_ctx->lockowner.l_pid != current->tgid; 1118 || l_ctx->lockowner.l_pid != current->tgid;
1121 } 1119 }
@@ -1176,6 +1174,13 @@ out:
1176 return PageUptodate(page) != 0; 1174 return PageUptodate(page) != 0;
1177} 1175}
1178 1176
1177static bool
1178is_whole_file_wrlock(struct file_lock *fl)
1179{
1180 return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
1181 fl->fl_type == F_WRLCK;
1182}
1183
1179/* If we know the page is up to date, and we're not using byte range locks (or 1184/* If we know the page is up to date, and we're not using byte range locks (or
1180 * if we have the whole file locked for writing), it may be more efficient to 1185 * if we have the whole file locked for writing), it may be more efficient to
1181 * extend the write to cover the entire page in order to avoid fragmentation 1186 * extend the write to cover the entire page in order to avoid fragmentation
@@ -1186,17 +1191,36 @@ out:
1186 */ 1191 */
1187static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) 1192static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
1188{ 1193{
1194 int ret;
1195 struct file_lock_context *flctx = inode->i_flctx;
1196 struct file_lock *fl;
1197
1189 if (file->f_flags & O_DSYNC) 1198 if (file->f_flags & O_DSYNC)
1190 return 0; 1199 return 0;
1191 if (!nfs_write_pageuptodate(page, inode)) 1200 if (!nfs_write_pageuptodate(page, inode))
1192 return 0; 1201 return 0;
1193 if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) 1202 if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
1194 return 1; 1203 return 1;
1195 if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && 1204 if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
1196 inode->i_flock->fl_end == OFFSET_MAX && 1205 list_empty_careful(&flctx->flc_posix)))
1197 inode->i_flock->fl_type != F_RDLCK)) 1206 return 0;
1198 return 1; 1207
1199 return 0; 1208 /* Check to see if there are whole file write locks */
1209 ret = 0;
1210 spin_lock(&flctx->flc_lock);
1211 if (!list_empty(&flctx->flc_posix)) {
1212 fl = list_first_entry(&flctx->flc_posix, struct file_lock,
1213 fl_list);
1214 if (is_whole_file_wrlock(fl))
1215 ret = 1;
1216 } else if (!list_empty(&flctx->flc_flock)) {
1217 fl = list_first_entry(&flctx->flc_flock, struct file_lock,
1218 fl_list);
1219 if (fl->fl_type == F_WRLCK)
1220 ret = 1;
1221 }
1222 spin_unlock(&flctx->flc_lock);
1223 return ret;
1200} 1224}
1201 1225
1202/* 1226/*
@@ -1576,11 +1600,8 @@ void nfs_retry_commit(struct list_head *page_list,
1576 req = nfs_list_entry(page_list->next); 1600 req = nfs_list_entry(page_list->next);
1577 nfs_list_remove_request(req); 1601 nfs_list_remove_request(req);
1578 nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); 1602 nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
1579 if (!cinfo->dreq) { 1603 if (!cinfo->dreq)
1580 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1604 nfs_clear_page_commit(req->wb_page);
1581 dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1582 BDI_RECLAIMABLE);
1583 }
1584 nfs_unlock_and_release_request(req); 1605 nfs_unlock_and_release_request(req);
1585 } 1606 }
1586} 1607}
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 73395156bdb4..683bf718aead 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,16 @@ config NFSD_V4
82 82
83 If unsure, say N. 83 If unsure, say N.
84 84
85config NFSD_PNFS
86 bool "NFSv4.1 server support for Parallel NFS (pNFS)"
87 depends on NFSD_V4
88 help
89 This option enables support for the parallel NFS features of the
90 minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
91 server.
92
93 If unsure, say N.
94
85config NFSD_V4_SECURITY_LABEL 95config NFSD_V4_SECURITY_LABEL
86 bool "Provide Security Label support for NFSv4 server" 96 bool "Provide Security Label support for NFSv4 server"
87 depends on NFSD_V4 && SECURITY 97 depends on NFSD_V4 && SECURITY
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index af32ef06b4fe..9a6028e120c6 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -2,9 +2,14 @@
2# Makefile for the Linux nfs server 2# Makefile for the Linux nfs server
3# 3#
4 4
5ccflags-y += -I$(src) # needed for trace events
6
5obj-$(CONFIG_NFSD) += nfsd.o 7obj-$(CONFIG_NFSD) += nfsd.o
6 8
7nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ 9# this one should be compiled first, as the tracing macros can easily blow up
10nfsd-y += trace.o
11
12nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
8 export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o 13 export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
9nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o 14nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
10nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o 15nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
@@ -12,3 +17,4 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
12nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 17nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
13nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ 18nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
14 nfs4acl.o nfs4callback.o nfs4recover.o 19 nfs4acl.o nfs4callback.o nfs4recover.o
20nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
new file mode 100644
index 000000000000..cdbc78c72542
--- /dev/null
+++ b/fs/nfsd/blocklayout.c
@@ -0,0 +1,189 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#include <linux/exportfs.h>
5#include <linux/genhd.h>
6#include <linux/slab.h>
7
8#include <linux/nfsd/debug.h>
9
10#include "blocklayoutxdr.h"
11#include "pnfs.h"
12
13#define NFSDDBG_FACILITY NFSDDBG_PNFS
14
15
16static int
17nfsd4_block_get_device_info_simple(struct super_block *sb,
18 struct nfsd4_getdeviceinfo *gdp)
19{
20 struct pnfs_block_deviceaddr *dev;
21 struct pnfs_block_volume *b;
22
23 dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
24 sizeof(struct pnfs_block_volume), GFP_KERNEL);
25 if (!dev)
26 return -ENOMEM;
27 gdp->gd_device = dev;
28
29 dev->nr_volumes = 1;
30 b = &dev->volumes[0];
31
32 b->type = PNFS_BLOCK_VOLUME_SIMPLE;
33 b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
34 return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
35 &b->simple.offset);
36}
37
38static __be32
39nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
40 struct nfsd4_getdeviceinfo *gdp)
41{
42 if (sb->s_bdev != sb->s_bdev->bd_contains)
43 return nfserr_inval;
44 return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
45}
46
47static __be32
48nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
49 struct nfsd4_layoutget *args)
50{
51 struct nfsd4_layout_seg *seg = &args->lg_seg;
52 struct super_block *sb = inode->i_sb;
53 u32 block_size = (1 << inode->i_blkbits);
54 struct pnfs_block_extent *bex;
55 struct iomap iomap;
56 u32 device_generation = 0;
57 int error;
58
59 /*
60 * We do not attempt to support I/O smaller than the fs block size,
61 * or not aligned to it.
62 */
63 if (args->lg_minlength < block_size) {
64 dprintk("pnfsd: I/O too small\n");
65 goto out_layoutunavailable;
66 }
67 if (seg->offset & (block_size - 1)) {
68 dprintk("pnfsd: I/O misaligned\n");
69 goto out_layoutunavailable;
70 }
71
72 /*
73 * Some clients barf on non-zero block numbers for NONE or INVALID
74 * layouts, so make sure to zero the whole structure.
75 */
76 error = -ENOMEM;
77 bex = kzalloc(sizeof(*bex), GFP_KERNEL);
78 if (!bex)
79 goto out_error;
80 args->lg_content = bex;
81
82 error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length,
83 &iomap, seg->iomode != IOMODE_READ,
84 &device_generation);
85 if (error) {
86 if (error == -ENXIO)
87 goto out_layoutunavailable;
88 goto out_error;
89 }
90
91 if (iomap.length < args->lg_minlength) {
92 dprintk("pnfsd: extent smaller than minlength\n");
93 goto out_layoutunavailable;
94 }
95
96 switch (iomap.type) {
97 case IOMAP_MAPPED:
98 if (seg->iomode == IOMODE_READ)
99 bex->es = PNFS_BLOCK_READ_DATA;
100 else
101 bex->es = PNFS_BLOCK_READWRITE_DATA;
102 bex->soff = (iomap.blkno << 9);
103 break;
104 case IOMAP_UNWRITTEN:
105 if (seg->iomode & IOMODE_RW) {
106 /*
107 * Crack monkey special case from section 2.3.1.
108 */
109 if (args->lg_minlength == 0) {
110 dprintk("pnfsd: no soup for you!\n");
111 goto out_layoutunavailable;
112 }
113
114 bex->es = PNFS_BLOCK_INVALID_DATA;
115 bex->soff = (iomap.blkno << 9);
116 break;
117 }
118 /*FALLTHRU*/
119 case IOMAP_HOLE:
120 if (seg->iomode == IOMODE_READ) {
121 bex->es = PNFS_BLOCK_NONE_DATA;
122 break;
123 }
124 /*FALLTHRU*/
125 case IOMAP_DELALLOC:
126 default:
127 WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
128 goto out_layoutunavailable;
129 }
130
131 error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation);
132 if (error)
133 goto out_error;
134 bex->foff = iomap.offset;
135 bex->len = iomap.length;
136
137 seg->offset = iomap.offset;
138 seg->length = iomap.length;
139
140 dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es);
141 return 0;
142
143out_error:
144 seg->length = 0;
145 return nfserrno(error);
146out_layoutunavailable:
147 seg->length = 0;
148 return nfserr_layoutunavailable;
149}
150
151static __be32
152nfsd4_block_proc_layoutcommit(struct inode *inode,
153 struct nfsd4_layoutcommit *lcp)
154{
155 loff_t new_size = lcp->lc_last_wr + 1;
156 struct iattr iattr = { .ia_valid = 0 };
157 struct iomap *iomaps;
158 int nr_iomaps;
159 int error;
160
161 nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
162 lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
163 if (nr_iomaps < 0)
164 return nfserrno(nr_iomaps);
165
166 if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
167 timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
168 lcp->lc_mtime = current_fs_time(inode->i_sb);
169 iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
170 iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
171
172 if (new_size > i_size_read(inode)) {
173 iattr.ia_valid |= ATTR_SIZE;
174 iattr.ia_size = new_size;
175 }
176
177 error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
178 nr_iomaps, &iattr);
179 kfree(iomaps);
180 return nfserrno(error);
181}
182
183const struct nfsd4_layout_ops bl_layout_ops = {
184 .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
185 .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
186 .proc_layoutget = nfsd4_block_proc_layoutget,
187 .encode_layoutget = nfsd4_block_encode_layoutget,
188 .proc_layoutcommit = nfsd4_block_proc_layoutcommit,
189};
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
new file mode 100644
index 000000000000..9da89fddab33
--- /dev/null
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -0,0 +1,157 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#include <linux/sunrpc/svc.h>
5#include <linux/exportfs.h>
6#include <linux/nfs4.h>
7
8#include "nfsd.h"
9#include "blocklayoutxdr.h"
10
11#define NFSDDBG_FACILITY NFSDDBG_PNFS
12
13
14__be32
15nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
16 struct nfsd4_layoutget *lgp)
17{
18 struct pnfs_block_extent *b = lgp->lg_content;
19 int len = sizeof(__be32) + 5 * sizeof(__be64) + sizeof(__be32);
20 __be32 *p;
21
22 p = xdr_reserve_space(xdr, sizeof(__be32) + len);
23 if (!p)
24 return nfserr_toosmall;
25
26 *p++ = cpu_to_be32(len);
27 *p++ = cpu_to_be32(1); /* we always return a single extent */
28
29 p = xdr_encode_opaque_fixed(p, &b->vol_id,
30 sizeof(struct nfsd4_deviceid));
31 p = xdr_encode_hyper(p, b->foff);
32 p = xdr_encode_hyper(p, b->len);
33 p = xdr_encode_hyper(p, b->soff);
34 *p++ = cpu_to_be32(b->es);
35 return 0;
36}
37
38static int
39nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
40{
41 __be32 *p;
42 int len;
43
44 switch (b->type) {
45 case PNFS_BLOCK_VOLUME_SIMPLE:
46 len = 4 + 4 + 8 + 4 + b->simple.sig_len;
47 p = xdr_reserve_space(xdr, len);
48 if (!p)
49 return -ETOOSMALL;
50
51 *p++ = cpu_to_be32(b->type);
52 *p++ = cpu_to_be32(1); /* single signature */
53 p = xdr_encode_hyper(p, b->simple.offset);
54 p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
55 break;
56 default:
57 return -ENOTSUPP;
58 }
59
60 return len;
61}
62
63__be32
64nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
65 struct nfsd4_getdeviceinfo *gdp)
66{
67 struct pnfs_block_deviceaddr *dev = gdp->gd_device;
68 int len = sizeof(__be32), ret, i;
69 __be32 *p;
70
71 p = xdr_reserve_space(xdr, len + sizeof(__be32));
72 if (!p)
73 return nfserr_resource;
74
75 for (i = 0; i < dev->nr_volumes; i++) {
76 ret = nfsd4_block_encode_volume(xdr, &dev->volumes[i]);
77 if (ret < 0)
78 return nfserrno(ret);
79 len += ret;
80 }
81
82 /*
83 * Fill in the overall length and number of volumes at the beginning
84 * of the layout.
85 */
86 *p++ = cpu_to_be32(len);
87 *p++ = cpu_to_be32(dev->nr_volumes);
88 return 0;
89}
90
91int
92nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
93 u32 block_size)
94{
95 struct iomap *iomaps;
96 u32 nr_iomaps, expected, i;
97
98 if (len < sizeof(u32)) {
99 dprintk("%s: extent array too small: %u\n", __func__, len);
100 return -EINVAL;
101 }
102
103 nr_iomaps = be32_to_cpup(p++);
104 expected = sizeof(__be32) + nr_iomaps * NFS4_BLOCK_EXTENT_SIZE;
105 if (len != expected) {
106 dprintk("%s: extent array size mismatch: %u/%u\n",
107 __func__, len, expected);
108 return -EINVAL;
109 }
110
111 iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
112 if (!iomaps) {
113 dprintk("%s: failed to allocate extent array\n", __func__);
114 return -ENOMEM;
115 }
116
117 for (i = 0; i < nr_iomaps; i++) {
118 struct pnfs_block_extent bex;
119
120 memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid));
121 p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid));
122
123 p = xdr_decode_hyper(p, &bex.foff);
124 if (bex.foff & (block_size - 1)) {
125 dprintk("%s: unaligned offset %lld\n",
126 __func__, bex.foff);
127 goto fail;
128 }
129 p = xdr_decode_hyper(p, &bex.len);
130 if (bex.len & (block_size - 1)) {
131 dprintk("%s: unaligned length %lld\n",
132 __func__, bex.foff);
133 goto fail;
134 }
135 p = xdr_decode_hyper(p, &bex.soff);
136 if (bex.soff & (block_size - 1)) {
137 dprintk("%s: unaligned disk offset %lld\n",
138 __func__, bex.soff);
139 goto fail;
140 }
141 bex.es = be32_to_cpup(p++);
142 if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
143 dprintk("%s: incorrect extent state %d\n",
144 __func__, bex.es);
145 goto fail;
146 }
147
148 iomaps[i].offset = bex.foff;
149 iomaps[i].length = bex.len;
150 }
151
152 *iomapp = iomaps;
153 return nr_iomaps;
154fail:
155 kfree(iomaps);
156 return -EINVAL;
157}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
new file mode 100644
index 000000000000..fdc79037c0e7
--- /dev/null
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -0,0 +1,62 @@
1#ifndef _NFSD_BLOCKLAYOUTXDR_H
2#define _NFSD_BLOCKLAYOUTXDR_H 1
3
4#include <linux/blkdev.h>
5#include "xdr4.h"
6
7struct iomap;
8struct xdr_stream;
9
10enum pnfs_block_extent_state {
11 PNFS_BLOCK_READWRITE_DATA = 0,
12 PNFS_BLOCK_READ_DATA = 1,
13 PNFS_BLOCK_INVALID_DATA = 2,
14 PNFS_BLOCK_NONE_DATA = 3,
15};
16
17struct pnfs_block_extent {
18 struct nfsd4_deviceid vol_id;
19 u64 foff;
20 u64 len;
21 u64 soff;
22 enum pnfs_block_extent_state es;
23};
24#define NFS4_BLOCK_EXTENT_SIZE 44
25
26enum pnfs_block_volume_type {
27 PNFS_BLOCK_VOLUME_SIMPLE = 0,
28 PNFS_BLOCK_VOLUME_SLICE = 1,
29 PNFS_BLOCK_VOLUME_CONCAT = 2,
30 PNFS_BLOCK_VOLUME_STRIPE = 3,
31};
32
33/*
34 * Random upper cap for the uuid length to avoid unbounded allocation.
35 * Not actually limited by the protocol.
36 */
37#define PNFS_BLOCK_UUID_LEN 128
38
39struct pnfs_block_volume {
40 enum pnfs_block_volume_type type;
41 union {
42 struct {
43 u64 offset;
44 u32 sig_len;
45 u8 sig[PNFS_BLOCK_UUID_LEN];
46 } simple;
47 };
48};
49
50struct pnfs_block_deviceaddr {
51 u32 nr_volumes;
52 struct pnfs_block_volume volumes[];
53};
54
55__be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
56 struct nfsd4_getdeviceinfo *gdp);
57__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
58 struct nfsd4_layoutget *lgp);
59int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
60 u32 block_size);
61
62#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 30a739d896ff..c3e3b6e55ae2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -20,6 +20,7 @@
20#include "nfsd.h" 20#include "nfsd.h"
21#include "nfsfh.h" 21#include "nfsfh.h"
22#include "netns.h" 22#include "netns.h"
23#include "pnfs.h"
23 24
24#define NFSDDBG_FACILITY NFSDDBG_EXPORT 25#define NFSDDBG_FACILITY NFSDDBG_EXPORT
25 26
@@ -545,6 +546,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
545 546
546 exp.ex_client = dom; 547 exp.ex_client = dom;
547 exp.cd = cd; 548 exp.cd = cd;
549 exp.ex_devid_map = NULL;
548 550
549 /* expiry */ 551 /* expiry */
550 err = -EINVAL; 552 err = -EINVAL;
@@ -621,6 +623,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
621 if (!gid_valid(exp.ex_anon_gid)) 623 if (!gid_valid(exp.ex_anon_gid))
622 goto out4; 624 goto out4;
623 err = 0; 625 err = 0;
626
627 nfsd4_setup_layout_type(&exp);
624 } 628 }
625 629
626 expp = svc_export_lookup(&exp); 630 expp = svc_export_lookup(&exp);
@@ -703,6 +707,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
703 new->ex_fslocs.locations = NULL; 707 new->ex_fslocs.locations = NULL;
704 new->ex_fslocs.locations_count = 0; 708 new->ex_fslocs.locations_count = 0;
705 new->ex_fslocs.migrated = 0; 709 new->ex_fslocs.migrated = 0;
710 new->ex_layout_type = 0;
706 new->ex_uuid = NULL; 711 new->ex_uuid = NULL;
707 new->cd = item->cd; 712 new->cd = item->cd;
708} 713}
@@ -717,6 +722,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
717 new->ex_anon_uid = item->ex_anon_uid; 722 new->ex_anon_uid = item->ex_anon_uid;
718 new->ex_anon_gid = item->ex_anon_gid; 723 new->ex_anon_gid = item->ex_anon_gid;
719 new->ex_fsid = item->ex_fsid; 724 new->ex_fsid = item->ex_fsid;
725 new->ex_devid_map = item->ex_devid_map;
726 item->ex_devid_map = NULL;
720 new->ex_uuid = item->ex_uuid; 727 new->ex_uuid = item->ex_uuid;
721 item->ex_uuid = NULL; 728 item->ex_uuid = NULL;
722 new->ex_fslocs.locations = item->ex_fslocs.locations; 729 new->ex_fslocs.locations = item->ex_fslocs.locations;
@@ -725,6 +732,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
725 item->ex_fslocs.locations_count = 0; 732 item->ex_fslocs.locations_count = 0;
726 new->ex_fslocs.migrated = item->ex_fslocs.migrated; 733 new->ex_fslocs.migrated = item->ex_fslocs.migrated;
727 item->ex_fslocs.migrated = 0; 734 item->ex_fslocs.migrated = 0;
735 new->ex_layout_type = item->ex_layout_type;
728 new->ex_nflavors = item->ex_nflavors; 736 new->ex_nflavors = item->ex_nflavors;
729 for (i = 0; i < MAX_SECINFO_LIST; i++) { 737 for (i = 0; i < MAX_SECINFO_LIST; i++) {
730 new->ex_flavors[i] = item->ex_flavors[i]; 738 new->ex_flavors[i] = item->ex_flavors[i];
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 04dc8c167b0c..1f52bfcc436f 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -56,6 +56,8 @@ struct svc_export {
56 struct nfsd4_fs_locations ex_fslocs; 56 struct nfsd4_fs_locations ex_fslocs;
57 uint32_t ex_nflavors; 57 uint32_t ex_nflavors;
58 struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; 58 struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
59 enum pnfs_layouttype ex_layout_type;
60 struct nfsd4_deviceid_map *ex_devid_map;
59 struct cache_detail *cd; 61 struct cache_detail *cd;
60}; 62};
61 63
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7cbdf1b2e4ab..58277859a467 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -546,6 +546,102 @@ out:
546 return status; 546 return status;
547} 547}
548 548
549#ifdef CONFIG_NFSD_PNFS
550/*
551 * CB_LAYOUTRECALL4args
552 *
553 * struct layoutrecall_file4 {
554 * nfs_fh4 lor_fh;
555 * offset4 lor_offset;
556 * length4 lor_length;
557 * stateid4 lor_stateid;
558 * };
559 *
560 * union layoutrecall4 switch(layoutrecall_type4 lor_recalltype) {
561 * case LAYOUTRECALL4_FILE:
562 * layoutrecall_file4 lor_layout;
563 * case LAYOUTRECALL4_FSID:
564 * fsid4 lor_fsid;
565 * case LAYOUTRECALL4_ALL:
566 * void;
567 * };
568 *
569 * struct CB_LAYOUTRECALL4args {
570 * layouttype4 clora_type;
571 * layoutiomode4 clora_iomode;
572 * bool clora_changed;
573 * layoutrecall4 clora_recall;
574 * };
575 */
576static void encode_cb_layout4args(struct xdr_stream *xdr,
577 const struct nfs4_layout_stateid *ls,
578 struct nfs4_cb_compound_hdr *hdr)
579{
580 __be32 *p;
581
582 BUG_ON(hdr->minorversion == 0);
583
584 p = xdr_reserve_space(xdr, 5 * 4);
585 *p++ = cpu_to_be32(OP_CB_LAYOUTRECALL);
586 *p++ = cpu_to_be32(ls->ls_layout_type);
587 *p++ = cpu_to_be32(IOMODE_ANY);
588 *p++ = cpu_to_be32(1);
589 *p = cpu_to_be32(RETURN_FILE);
590
591 encode_nfs_fh4(xdr, &ls->ls_stid.sc_file->fi_fhandle);
592
593 p = xdr_reserve_space(xdr, 2 * 8);
594 p = xdr_encode_hyper(p, 0);
595 xdr_encode_hyper(p, NFS4_MAX_UINT64);
596
597 encode_stateid4(xdr, &ls->ls_recall_sid);
598
599 hdr->nops++;
600}
601
602static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
603 struct xdr_stream *xdr,
604 const struct nfsd4_callback *cb)
605{
606 const struct nfs4_layout_stateid *ls =
607 container_of(cb, struct nfs4_layout_stateid, ls_recall);
608 struct nfs4_cb_compound_hdr hdr = {
609 .ident = 0,
610 .minorversion = cb->cb_minorversion,
611 };
612
613 encode_cb_compound4args(xdr, &hdr);
614 encode_cb_sequence4args(xdr, cb, &hdr);
615 encode_cb_layout4args(xdr, ls, &hdr);
616 encode_cb_nops(&hdr);
617}
618
619static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
620 struct xdr_stream *xdr,
621 struct nfsd4_callback *cb)
622{
623 struct nfs4_cb_compound_hdr hdr;
624 enum nfsstat4 nfserr;
625 int status;
626
627 status = decode_cb_compound4res(xdr, &hdr);
628 if (unlikely(status))
629 goto out;
630 if (cb) {
631 status = decode_cb_sequence4res(xdr, cb);
632 if (unlikely(status))
633 goto out;
634 }
635 status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr);
636 if (unlikely(status))
637 goto out;
638 if (unlikely(nfserr != NFS4_OK))
639 status = nfs_cb_stat_to_errno(nfserr);
640out:
641 return status;
642}
643#endif /* CONFIG_NFSD_PNFS */
644
549/* 645/*
550 * RPC procedure tables 646 * RPC procedure tables
551 */ 647 */
@@ -563,6 +659,9 @@ out:
563static struct rpc_procinfo nfs4_cb_procedures[] = { 659static struct rpc_procinfo nfs4_cb_procedures[] = {
564 PROC(CB_NULL, NULL, cb_null, cb_null), 660 PROC(CB_NULL, NULL, cb_null, cb_null),
565 PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), 661 PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall),
662#ifdef CONFIG_NFSD_PNFS
663 PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout),
664#endif
566}; 665};
567 666
568static struct rpc_version nfs_cb_version4 = { 667static struct rpc_version nfs_cb_version4 = {
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
new file mode 100644
index 000000000000..3c1bfa155571
--- /dev/null
+++ b/fs/nfsd/nfs4layouts.c
@@ -0,0 +1,721 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#include <linux/kmod.h>
5#include <linux/file.h>
6#include <linux/jhash.h>
7#include <linux/sched.h>
8#include <linux/sunrpc/addr.h>
9
10#include "pnfs.h"
11#include "netns.h"
12#include "trace.h"
13
14#define NFSDDBG_FACILITY NFSDDBG_PNFS
15
16struct nfs4_layout {
17 struct list_head lo_perstate;
18 struct nfs4_layout_stateid *lo_state;
19 struct nfsd4_layout_seg lo_seg;
20};
21
22static struct kmem_cache *nfs4_layout_cache;
23static struct kmem_cache *nfs4_layout_stateid_cache;
24
25static struct nfsd4_callback_ops nfsd4_cb_layout_ops;
26static const struct lock_manager_operations nfsd4_layouts_lm_ops;
27
28const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
29 [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops,
30};
31
32/* pNFS device ID to export fsid mapping */
33#define DEVID_HASH_BITS 8
34#define DEVID_HASH_SIZE (1 << DEVID_HASH_BITS)
35#define DEVID_HASH_MASK (DEVID_HASH_SIZE - 1)
36static u64 nfsd_devid_seq = 1;
37static struct list_head nfsd_devid_hash[DEVID_HASH_SIZE];
38static DEFINE_SPINLOCK(nfsd_devid_lock);
39
40static inline u32 devid_hashfn(u64 idx)
41{
42 return jhash_2words(idx, idx >> 32, 0) & DEVID_HASH_MASK;
43}
44
45static void
46nfsd4_alloc_devid_map(const struct svc_fh *fhp)
47{
48 const struct knfsd_fh *fh = &fhp->fh_handle;
49 size_t fsid_len = key_len(fh->fh_fsid_type);
50 struct nfsd4_deviceid_map *map, *old;
51 int i;
52
53 map = kzalloc(sizeof(*map) + fsid_len, GFP_KERNEL);
54 if (!map)
55 return;
56
57 map->fsid_type = fh->fh_fsid_type;
58 memcpy(&map->fsid, fh->fh_fsid, fsid_len);
59
60 spin_lock(&nfsd_devid_lock);
61 if (fhp->fh_export->ex_devid_map)
62 goto out_unlock;
63
64 for (i = 0; i < DEVID_HASH_SIZE; i++) {
65 list_for_each_entry(old, &nfsd_devid_hash[i], hash) {
66 if (old->fsid_type != fh->fh_fsid_type)
67 continue;
68 if (memcmp(old->fsid, fh->fh_fsid,
69 key_len(old->fsid_type)))
70 continue;
71
72 fhp->fh_export->ex_devid_map = old;
73 goto out_unlock;
74 }
75 }
76
77 map->idx = nfsd_devid_seq++;
78 list_add_tail_rcu(&map->hash, &nfsd_devid_hash[devid_hashfn(map->idx)]);
79 fhp->fh_export->ex_devid_map = map;
80 map = NULL;
81
82out_unlock:
83 spin_unlock(&nfsd_devid_lock);
84 kfree(map);
85}
86
87struct nfsd4_deviceid_map *
88nfsd4_find_devid_map(int idx)
89{
90 struct nfsd4_deviceid_map *map, *ret = NULL;
91
92 rcu_read_lock();
93 list_for_each_entry_rcu(map, &nfsd_devid_hash[devid_hashfn(idx)], hash)
94 if (map->idx == idx)
95 ret = map;
96 rcu_read_unlock();
97
98 return ret;
99}
100
101int
102nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
103 u32 device_generation)
104{
105 if (!fhp->fh_export->ex_devid_map) {
106 nfsd4_alloc_devid_map(fhp);
107 if (!fhp->fh_export->ex_devid_map)
108 return -ENOMEM;
109 }
110
111 id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
112 id->generation = device_generation;
113 id->pad = 0;
114 return 0;
115}
116
117void nfsd4_setup_layout_type(struct svc_export *exp)
118{
119 struct super_block *sb = exp->ex_path.mnt->mnt_sb;
120
121 if (exp->ex_flags & NFSEXP_NOPNFS)
122 return;
123
124 if (sb->s_export_op->get_uuid &&
125 sb->s_export_op->map_blocks &&
126 sb->s_export_op->commit_blocks)
127 exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
128}
129
130static void
131nfsd4_free_layout_stateid(struct nfs4_stid *stid)
132{
133 struct nfs4_layout_stateid *ls = layoutstateid(stid);
134 struct nfs4_client *clp = ls->ls_stid.sc_client;
135 struct nfs4_file *fp = ls->ls_stid.sc_file;
136
137 trace_layoutstate_free(&ls->ls_stid.sc_stateid);
138
139 spin_lock(&clp->cl_lock);
140 list_del_init(&ls->ls_perclnt);
141 spin_unlock(&clp->cl_lock);
142
143 spin_lock(&fp->fi_lock);
144 list_del_init(&ls->ls_perfile);
145 spin_unlock(&fp->fi_lock);
146
147 vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
148 fput(ls->ls_file);
149
150 if (ls->ls_recalled)
151 atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
152
153 kmem_cache_free(nfs4_layout_stateid_cache, ls);
154}
155
156static int
157nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
158{
159 struct file_lock *fl;
160 int status;
161
162 fl = locks_alloc_lock();
163 if (!fl)
164 return -ENOMEM;
165 locks_init_lock(fl);
166 fl->fl_lmops = &nfsd4_layouts_lm_ops;
167 fl->fl_flags = FL_LAYOUT;
168 fl->fl_type = F_RDLCK;
169 fl->fl_end = OFFSET_MAX;
170 fl->fl_owner = ls;
171 fl->fl_pid = current->tgid;
172 fl->fl_file = ls->ls_file;
173
174 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
175 if (status) {
176 locks_free_lock(fl);
177 return status;
178 }
179 BUG_ON(fl != NULL);
180 return 0;
181}
182
183static struct nfs4_layout_stateid *
184nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
185 struct nfs4_stid *parent, u32 layout_type)
186{
187 struct nfs4_client *clp = cstate->clp;
188 struct nfs4_file *fp = parent->sc_file;
189 struct nfs4_layout_stateid *ls;
190 struct nfs4_stid *stp;
191
192 stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
193 if (!stp)
194 return NULL;
195 stp->sc_free = nfsd4_free_layout_stateid;
196 get_nfs4_file(fp);
197 stp->sc_file = fp;
198
199 ls = layoutstateid(stp);
200 INIT_LIST_HEAD(&ls->ls_perclnt);
201 INIT_LIST_HEAD(&ls->ls_perfile);
202 spin_lock_init(&ls->ls_lock);
203 INIT_LIST_HEAD(&ls->ls_layouts);
204 ls->ls_layout_type = layout_type;
205 nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
206 NFSPROC4_CLNT_CB_LAYOUT);
207
208 if (parent->sc_type == NFS4_DELEG_STID)
209 ls->ls_file = get_file(fp->fi_deleg_file);
210 else
211 ls->ls_file = find_any_file(fp);
212 BUG_ON(!ls->ls_file);
213
214 if (nfsd4_layout_setlease(ls)) {
215 put_nfs4_file(fp);
216 kmem_cache_free(nfs4_layout_stateid_cache, ls);
217 return NULL;
218 }
219
220 spin_lock(&clp->cl_lock);
221 stp->sc_type = NFS4_LAYOUT_STID;
222 list_add(&ls->ls_perclnt, &clp->cl_lo_states);
223 spin_unlock(&clp->cl_lock);
224
225 spin_lock(&fp->fi_lock);
226 list_add(&ls->ls_perfile, &fp->fi_lo_states);
227 spin_unlock(&fp->fi_lock);
228
229 trace_layoutstate_alloc(&ls->ls_stid.sc_stateid);
230 return ls;
231}
232
233__be32
234nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
235 struct nfsd4_compound_state *cstate, stateid_t *stateid,
236 bool create, u32 layout_type, struct nfs4_layout_stateid **lsp)
237{
238 struct nfs4_layout_stateid *ls;
239 struct nfs4_stid *stid;
240 unsigned char typemask = NFS4_LAYOUT_STID;
241 __be32 status;
242
243 if (create)
244 typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
245
246 status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
247 net_generic(SVC_NET(rqstp), nfsd_net_id));
248 if (status)
249 goto out;
250
251 if (!fh_match(&cstate->current_fh.fh_handle,
252 &stid->sc_file->fi_fhandle)) {
253 status = nfserr_bad_stateid;
254 goto out_put_stid;
255 }
256
257 if (stid->sc_type != NFS4_LAYOUT_STID) {
258 ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
259 nfs4_put_stid(stid);
260
261 status = nfserr_jukebox;
262 if (!ls)
263 goto out;
264 } else {
265 ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
266
267 status = nfserr_bad_stateid;
268 if (stateid->si_generation > stid->sc_stateid.si_generation)
269 goto out_put_stid;
270 if (layout_type != ls->ls_layout_type)
271 goto out_put_stid;
272 }
273
274 *lsp = ls;
275 return 0;
276
277out_put_stid:
278 nfs4_put_stid(stid);
279out:
280 return status;
281}
282
283static void
284nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
285{
286 spin_lock(&ls->ls_lock);
287 if (ls->ls_recalled)
288 goto out_unlock;
289
290 ls->ls_recalled = true;
291 atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
292 if (list_empty(&ls->ls_layouts))
293 goto out_unlock;
294
295 trace_layout_recall(&ls->ls_stid.sc_stateid);
296
297 atomic_inc(&ls->ls_stid.sc_count);
298 update_stateid(&ls->ls_stid.sc_stateid);
299 memcpy(&ls->ls_recall_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
300 nfsd4_run_cb(&ls->ls_recall);
301
302out_unlock:
303 spin_unlock(&ls->ls_lock);
304}
305
306static inline u64
307layout_end(struct nfsd4_layout_seg *seg)
308{
309 u64 end = seg->offset + seg->length;
310 return end >= seg->offset ? end : NFS4_MAX_UINT64;
311}
312
313static void
314layout_update_len(struct nfsd4_layout_seg *lo, u64 end)
315{
316 if (end == NFS4_MAX_UINT64)
317 lo->length = NFS4_MAX_UINT64;
318 else
319 lo->length = end - lo->offset;
320}
321
322static bool
323layouts_overlapping(struct nfs4_layout *lo, struct nfsd4_layout_seg *s)
324{
325 if (s->iomode != IOMODE_ANY && s->iomode != lo->lo_seg.iomode)
326 return false;
327 if (layout_end(&lo->lo_seg) <= s->offset)
328 return false;
329 if (layout_end(s) <= lo->lo_seg.offset)
330 return false;
331 return true;
332}
333
334static bool
335layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new)
336{
337 if (lo->iomode != new->iomode)
338 return false;
339 if (layout_end(new) < lo->offset)
340 return false;
341 if (layout_end(lo) < new->offset)
342 return false;
343
344 lo->offset = min(lo->offset, new->offset);
345 layout_update_len(lo, max(layout_end(lo), layout_end(new)));
346 return true;
347}
348
349static __be32
350nfsd4_recall_conflict(struct nfs4_layout_stateid *ls)
351{
352 struct nfs4_file *fp = ls->ls_stid.sc_file;
353 struct nfs4_layout_stateid *l, *n;
354 __be32 nfserr = nfs_ok;
355
356 assert_spin_locked(&fp->fi_lock);
357
358 list_for_each_entry_safe(l, n, &fp->fi_lo_states, ls_perfile) {
359 if (l != ls) {
360 nfsd4_recall_file_layout(l);
361 nfserr = nfserr_recallconflict;
362 }
363 }
364
365 return nfserr;
366}
367
368__be32
369nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
370{
371 struct nfsd4_layout_seg *seg = &lgp->lg_seg;
372 struct nfs4_file *fp = ls->ls_stid.sc_file;
373 struct nfs4_layout *lp, *new = NULL;
374 __be32 nfserr;
375
376 spin_lock(&fp->fi_lock);
377 nfserr = nfsd4_recall_conflict(ls);
378 if (nfserr)
379 goto out;
380 spin_lock(&ls->ls_lock);
381 list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
382 if (layouts_try_merge(&lp->lo_seg, seg))
383 goto done;
384 }
385 spin_unlock(&ls->ls_lock);
386 spin_unlock(&fp->fi_lock);
387
388 new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
389 if (!new)
390 return nfserr_jukebox;
391 memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
392 new->lo_state = ls;
393
394 spin_lock(&fp->fi_lock);
395 nfserr = nfsd4_recall_conflict(ls);
396 if (nfserr)
397 goto out;
398 spin_lock(&ls->ls_lock);
399 list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
400 if (layouts_try_merge(&lp->lo_seg, seg))
401 goto done;
402 }
403
404 atomic_inc(&ls->ls_stid.sc_count);
405 list_add_tail(&new->lo_perstate, &ls->ls_layouts);
406 new = NULL;
407done:
408 update_stateid(&ls->ls_stid.sc_stateid);
409 memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
410 spin_unlock(&ls->ls_lock);
411out:
412 spin_unlock(&fp->fi_lock);
413 if (new)
414 kmem_cache_free(nfs4_layout_cache, new);
415 return nfserr;
416}
417
418static void
419nfsd4_free_layouts(struct list_head *reaplist)
420{
421 while (!list_empty(reaplist)) {
422 struct nfs4_layout *lp = list_first_entry(reaplist,
423 struct nfs4_layout, lo_perstate);
424
425 list_del(&lp->lo_perstate);
426 nfs4_put_stid(&lp->lo_state->ls_stid);
427 kmem_cache_free(nfs4_layout_cache, lp);
428 }
429}
430
431static void
432nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
433 struct list_head *reaplist)
434{
435 struct nfsd4_layout_seg *lo = &lp->lo_seg;
436 u64 end = layout_end(lo);
437
438 if (seg->offset <= lo->offset) {
439 if (layout_end(seg) >= end) {
440 list_move_tail(&lp->lo_perstate, reaplist);
441 return;
442 }
443 end = seg->offset;
444 } else {
445 /* retain the whole layout segment on a split. */
446 if (layout_end(seg) < end) {
447 dprintk("%s: split not supported\n", __func__);
448 return;
449 }
450
451 lo->offset = layout_end(seg);
452 }
453
454 layout_update_len(lo, end);
455}
456
457__be32
458nfsd4_return_file_layouts(struct svc_rqst *rqstp,
459 struct nfsd4_compound_state *cstate,
460 struct nfsd4_layoutreturn *lrp)
461{
462 struct nfs4_layout_stateid *ls;
463 struct nfs4_layout *lp, *n;
464 LIST_HEAD(reaplist);
465 __be32 nfserr;
466 int found = 0;
467
468 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lrp->lr_sid,
469 false, lrp->lr_layout_type,
470 &ls);
471 if (nfserr) {
472 trace_layout_return_lookup_fail(&lrp->lr_sid);
473 return nfserr;
474 }
475
476 spin_lock(&ls->ls_lock);
477 list_for_each_entry_safe(lp, n, &ls->ls_layouts, lo_perstate) {
478 if (layouts_overlapping(lp, &lrp->lr_seg)) {
479 nfsd4_return_file_layout(lp, &lrp->lr_seg, &reaplist);
480 found++;
481 }
482 }
483 if (!list_empty(&ls->ls_layouts)) {
484 if (found) {
485 update_stateid(&ls->ls_stid.sc_stateid);
486 memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
487 sizeof(stateid_t));
488 }
489 lrp->lrs_present = 1;
490 } else {
491 trace_layoutstate_unhash(&ls->ls_stid.sc_stateid);
492 nfs4_unhash_stid(&ls->ls_stid);
493 lrp->lrs_present = 0;
494 }
495 spin_unlock(&ls->ls_lock);
496
497 nfs4_put_stid(&ls->ls_stid);
498 nfsd4_free_layouts(&reaplist);
499 return nfs_ok;
500}
501
502__be32
503nfsd4_return_client_layouts(struct svc_rqst *rqstp,
504 struct nfsd4_compound_state *cstate,
505 struct nfsd4_layoutreturn *lrp)
506{
507 struct nfs4_layout_stateid *ls, *n;
508 struct nfs4_client *clp = cstate->clp;
509 struct nfs4_layout *lp, *t;
510 LIST_HEAD(reaplist);
511
512 lrp->lrs_present = 0;
513
514 spin_lock(&clp->cl_lock);
515 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
516 if (lrp->lr_return_type == RETURN_FSID &&
517 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
518 &cstate->current_fh.fh_handle))
519 continue;
520
521 spin_lock(&ls->ls_lock);
522 list_for_each_entry_safe(lp, t, &ls->ls_layouts, lo_perstate) {
523 if (lrp->lr_seg.iomode == IOMODE_ANY ||
524 lrp->lr_seg.iomode == lp->lo_seg.iomode)
525 list_move_tail(&lp->lo_perstate, &reaplist);
526 }
527 spin_unlock(&ls->ls_lock);
528 }
529 spin_unlock(&clp->cl_lock);
530
531 nfsd4_free_layouts(&reaplist);
532 return 0;
533}
534
535static void
536nfsd4_return_all_layouts(struct nfs4_layout_stateid *ls,
537 struct list_head *reaplist)
538{
539 spin_lock(&ls->ls_lock);
540 list_splice_init(&ls->ls_layouts, reaplist);
541 spin_unlock(&ls->ls_lock);
542}
543
544void
545nfsd4_return_all_client_layouts(struct nfs4_client *clp)
546{
547 struct nfs4_layout_stateid *ls, *n;
548 LIST_HEAD(reaplist);
549
550 spin_lock(&clp->cl_lock);
551 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt)
552 nfsd4_return_all_layouts(ls, &reaplist);
553 spin_unlock(&clp->cl_lock);
554
555 nfsd4_free_layouts(&reaplist);
556}
557
558void
559nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
560{
561 struct nfs4_layout_stateid *ls, *n;
562 LIST_HEAD(reaplist);
563
564 spin_lock(&fp->fi_lock);
565 list_for_each_entry_safe(ls, n, &fp->fi_lo_states, ls_perfile) {
566 if (ls->ls_stid.sc_client == clp)
567 nfsd4_return_all_layouts(ls, &reaplist);
568 }
569 spin_unlock(&fp->fi_lock);
570
571 nfsd4_free_layouts(&reaplist);
572}
573
574static void
575nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
576{
577 struct nfs4_client *clp = ls->ls_stid.sc_client;
578 char addr_str[INET6_ADDRSTRLEN];
579 static char *envp[] = {
580 "HOME=/",
581 "TERM=linux",
582 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
583 NULL
584 };
585 char *argv[8];
586 int error;
587
588 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
589
590 nfsd4_cb_layout_fail(ls);
591
592 printk(KERN_WARNING
593 "nfsd: client %s failed to respond to layout recall. "
594 " Fencing..\n", addr_str);
595
596 argv[0] = "/sbin/nfsd-recall-failed";
597 argv[1] = addr_str;
598 argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
599 argv[3] = NULL;
600
601 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
602 if (error) {
603 printk(KERN_ERR "nfsd: fence failed for client %s: %d!\n",
604 addr_str, error);
605 }
606}
607
608static int
609nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
610{
611 struct nfs4_layout_stateid *ls =
612 container_of(cb, struct nfs4_layout_stateid, ls_recall);
613 LIST_HEAD(reaplist);
614
615 switch (task->tk_status) {
616 case 0:
617 return 1;
618 case -NFS4ERR_NOMATCHING_LAYOUT:
619 trace_layout_recall_done(&ls->ls_stid.sc_stateid);
620 task->tk_status = 0;
621 return 1;
622 case -NFS4ERR_DELAY:
623 /* Poll the client until it's done with the layout */
624 /* FIXME: cap number of retries.
625 * The pnfs standard states that we need to only expire
626 * the client after at-least "lease time" .eg lease-time * 2
627 * when failing to communicate a recall
628 */
629 rpc_delay(task, HZ/100); /* 10 mili-seconds */
630 return 0;
631 default:
632 /*
633 * Unknown error or non-responding client, we'll need to fence.
634 */
635 nfsd4_cb_layout_fail(ls);
636 return -1;
637 }
638}
639
640static void
641nfsd4_cb_layout_release(struct nfsd4_callback *cb)
642{
643 struct nfs4_layout_stateid *ls =
644 container_of(cb, struct nfs4_layout_stateid, ls_recall);
645 LIST_HEAD(reaplist);
646
647 trace_layout_recall_release(&ls->ls_stid.sc_stateid);
648
649 nfsd4_return_all_layouts(ls, &reaplist);
650 nfsd4_free_layouts(&reaplist);
651 nfs4_put_stid(&ls->ls_stid);
652}
653
654static struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
655 .done = nfsd4_cb_layout_done,
656 .release = nfsd4_cb_layout_release,
657};
658
659static bool
660nfsd4_layout_lm_break(struct file_lock *fl)
661{
662 /*
663 * We don't want the locks code to timeout the lease for us;
664 * we'll remove it ourself if a layout isn't returned
665 * in time:
666 */
667 fl->fl_break_time = 0;
668 nfsd4_recall_file_layout(fl->fl_owner);
669 return false;
670}
671
672static int
673nfsd4_layout_lm_change(struct file_lock *onlist, int arg,
674 struct list_head *dispose)
675{
676 BUG_ON(!(arg & F_UNLCK));
677 return lease_modify(onlist, arg, dispose);
678}
679
680static const struct lock_manager_operations nfsd4_layouts_lm_ops = {
681 .lm_break = nfsd4_layout_lm_break,
682 .lm_change = nfsd4_layout_lm_change,
683};
684
685int
686nfsd4_init_pnfs(void)
687{
688 int i;
689
690 for (i = 0; i < DEVID_HASH_SIZE; i++)
691 INIT_LIST_HEAD(&nfsd_devid_hash[i]);
692
693 nfs4_layout_cache = kmem_cache_create("nfs4_layout",
694 sizeof(struct nfs4_layout), 0, 0, NULL);
695 if (!nfs4_layout_cache)
696 return -ENOMEM;
697
698 nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
699 sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
700 if (!nfs4_layout_stateid_cache) {
701 kmem_cache_destroy(nfs4_layout_cache);
702 return -ENOMEM;
703 }
704 return 0;
705}
706
707void
708nfsd4_exit_pnfs(void)
709{
710 int i;
711
712 kmem_cache_destroy(nfs4_layout_cache);
713 kmem_cache_destroy(nfs4_layout_stateid_cache);
714
715 for (i = 0; i < DEVID_HASH_SIZE; i++) {
716 struct nfsd4_deviceid_map *map, *n;
717
718 list_for_each_entry_safe(map, n, &nfsd_devid_hash[i], hash)
719 kfree(map);
720 }
721}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ac71d13c69ef..d30bea8d0277 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -43,6 +43,8 @@
43#include "current_stateid.h" 43#include "current_stateid.h"
44#include "netns.h" 44#include "netns.h"
45#include "acl.h" 45#include "acl.h"
46#include "pnfs.h"
47#include "trace.h"
46 48
47#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 49#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
48#include <linux/security.h> 50#include <linux/security.h>
@@ -1178,6 +1180,259 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1178 return status == nfserr_same ? nfs_ok : status; 1180 return status == nfserr_same ? nfs_ok : status;
1179} 1181}
1180 1182
1183#ifdef CONFIG_NFSD_PNFS
1184static const struct nfsd4_layout_ops *
1185nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
1186{
1187 if (!exp->ex_layout_type) {
1188 dprintk("%s: export does not support pNFS\n", __func__);
1189 return NULL;
1190 }
1191
1192 if (exp->ex_layout_type != layout_type) {
1193 dprintk("%s: layout type %d not supported\n",
1194 __func__, layout_type);
1195 return NULL;
1196 }
1197
1198 return nfsd4_layout_ops[layout_type];
1199}
1200
1201static __be32
1202nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
1203 struct nfsd4_compound_state *cstate,
1204 struct nfsd4_getdeviceinfo *gdp)
1205{
1206 const struct nfsd4_layout_ops *ops;
1207 struct nfsd4_deviceid_map *map;
1208 struct svc_export *exp;
1209 __be32 nfserr;
1210
1211 dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n",
1212 __func__,
1213 gdp->gd_layout_type,
1214 gdp->gd_devid.fsid_idx, gdp->gd_devid.generation,
1215 gdp->gd_maxcount);
1216
1217 map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx);
1218 if (!map) {
1219 dprintk("%s: couldn't find device ID to export mapping!\n",
1220 __func__);
1221 return nfserr_noent;
1222 }
1223
1224 exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid);
1225 if (IS_ERR(exp)) {
1226 dprintk("%s: could not find device id\n", __func__);
1227 return nfserr_noent;
1228 }
1229
1230 nfserr = nfserr_layoutunavailable;
1231 ops = nfsd4_layout_verify(exp, gdp->gd_layout_type);
1232 if (!ops)
1233 goto out;
1234
1235 nfserr = nfs_ok;
1236 if (gdp->gd_maxcount != 0)
1237 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
1238
1239 gdp->gd_notify_types &= ops->notify_types;
1240 exp_put(exp);
1241out:
1242 return nfserr;
1243}
1244
1245static __be32
1246nfsd4_layoutget(struct svc_rqst *rqstp,
1247 struct nfsd4_compound_state *cstate,
1248 struct nfsd4_layoutget *lgp)
1249{
1250 struct svc_fh *current_fh = &cstate->current_fh;
1251 const struct nfsd4_layout_ops *ops;
1252 struct nfs4_layout_stateid *ls;
1253 __be32 nfserr;
1254 int accmode;
1255
1256 switch (lgp->lg_seg.iomode) {
1257 case IOMODE_READ:
1258 accmode = NFSD_MAY_READ;
1259 break;
1260 case IOMODE_RW:
1261 accmode = NFSD_MAY_READ | NFSD_MAY_WRITE;
1262 break;
1263 default:
1264 dprintk("%s: invalid iomode %d\n",
1265 __func__, lgp->lg_seg.iomode);
1266 nfserr = nfserr_badiomode;
1267 goto out;
1268 }
1269
1270 nfserr = fh_verify(rqstp, current_fh, 0, accmode);
1271 if (nfserr)
1272 goto out;
1273
1274 nfserr = nfserr_layoutunavailable;
1275 ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type);
1276 if (!ops)
1277 goto out;
1278
1279 /*
1280 * Verify minlength and range as per RFC5661:
1281 * o If loga_length is less than loga_minlength,
1282 * the metadata server MUST return NFS4ERR_INVAL.
1283 * o If the sum of loga_offset and loga_minlength exceeds
1284 * NFS4_UINT64_MAX, and loga_minlength is not
1285 * NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result.
1286 * o If the sum of loga_offset and loga_length exceeds
1287 * NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX,
1288 * the error NFS4ERR_INVAL MUST result.
1289 */
1290 nfserr = nfserr_inval;
1291 if (lgp->lg_seg.length < lgp->lg_minlength ||
1292 (lgp->lg_minlength != NFS4_MAX_UINT64 &&
1293 lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) ||
1294 (lgp->lg_seg.length != NFS4_MAX_UINT64 &&
1295 lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset))
1296 goto out;
1297 if (lgp->lg_seg.length == 0)
1298 goto out;
1299
1300 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
1301 true, lgp->lg_layout_type, &ls);
1302 if (nfserr) {
1303 trace_layout_get_lookup_fail(&lgp->lg_sid);
1304 goto out;
1305 }
1306
1307 nfserr = nfserr_recallconflict;
1308 if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls))
1309 goto out_put_stid;
1310
1311 nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode,
1312 current_fh, lgp);
1313 if (nfserr)
1314 goto out_put_stid;
1315
1316 nfserr = nfsd4_insert_layout(lgp, ls);
1317
1318out_put_stid:
1319 nfs4_put_stid(&ls->ls_stid);
1320out:
1321 return nfserr;
1322}
1323
1324static __be32
1325nfsd4_layoutcommit(struct svc_rqst *rqstp,
1326 struct nfsd4_compound_state *cstate,
1327 struct nfsd4_layoutcommit *lcp)
1328{
1329 const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
1330 struct svc_fh *current_fh = &cstate->current_fh;
1331 const struct nfsd4_layout_ops *ops;
1332 loff_t new_size = lcp->lc_last_wr + 1;
1333 struct inode *inode;
1334 struct nfs4_layout_stateid *ls;
1335 __be32 nfserr;
1336
1337 nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
1338 if (nfserr)
1339 goto out;
1340
1341 nfserr = nfserr_layoutunavailable;
1342 ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type);
1343 if (!ops)
1344 goto out;
1345 inode = current_fh->fh_dentry->d_inode;
1346
1347 nfserr = nfserr_inval;
1348 if (new_size <= seg->offset) {
1349 dprintk("pnfsd: last write before layout segment\n");
1350 goto out;
1351 }
1352 if (new_size > seg->offset + seg->length) {
1353 dprintk("pnfsd: last write beyond layout segment\n");
1354 goto out;
1355 }
1356 if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
1357 dprintk("pnfsd: layoutcommit beyond EOF\n");
1358 goto out;
1359 }
1360
1361 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
1362 false, lcp->lc_layout_type,
1363 &ls);
1364 if (nfserr) {
1365 trace_layout_commit_lookup_fail(&lcp->lc_sid);
1366 /* fixup error code as per RFC5661 */
1367 if (nfserr == nfserr_bad_stateid)
1368 nfserr = nfserr_badlayout;
1369 goto out;
1370 }
1371
1372 nfserr = ops->proc_layoutcommit(inode, lcp);
1373 if (nfserr)
1374 goto out_put_stid;
1375
1376 if (new_size > i_size_read(inode)) {
1377 lcp->lc_size_chg = 1;
1378 lcp->lc_newsize = new_size;
1379 } else {
1380 lcp->lc_size_chg = 0;
1381 }
1382
1383out_put_stid:
1384 nfs4_put_stid(&ls->ls_stid);
1385out:
1386 return nfserr;
1387}
1388
1389static __be32
1390nfsd4_layoutreturn(struct svc_rqst *rqstp,
1391 struct nfsd4_compound_state *cstate,
1392 struct nfsd4_layoutreturn *lrp)
1393{
1394 struct svc_fh *current_fh = &cstate->current_fh;
1395 __be32 nfserr;
1396
1397 nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
1398 if (nfserr)
1399 goto out;
1400
1401 nfserr = nfserr_layoutunavailable;
1402 if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type))
1403 goto out;
1404
1405 switch (lrp->lr_seg.iomode) {
1406 case IOMODE_READ:
1407 case IOMODE_RW:
1408 case IOMODE_ANY:
1409 break;
1410 default:
1411 dprintk("%s: invalid iomode %d\n", __func__,
1412 lrp->lr_seg.iomode);
1413 nfserr = nfserr_inval;
1414 goto out;
1415 }
1416
1417 switch (lrp->lr_return_type) {
1418 case RETURN_FILE:
1419 nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp);
1420 break;
1421 case RETURN_FSID:
1422 case RETURN_ALL:
1423 nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp);
1424 break;
1425 default:
1426 dprintk("%s: invalid return_type %d\n", __func__,
1427 lrp->lr_return_type);
1428 nfserr = nfserr_inval;
1429 break;
1430 }
1431out:
1432 return nfserr;
1433}
1434#endif /* CONFIG_NFSD_PNFS */
1435
1181/* 1436/*
1182 * NULL call. 1437 * NULL call.
1183 */ 1438 */
@@ -1679,6 +1934,36 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
1679 op_encode_channel_attrs_maxsz) * sizeof(__be32); 1934 op_encode_channel_attrs_maxsz) * sizeof(__be32);
1680} 1935}
1681 1936
1937#ifdef CONFIG_NFSD_PNFS
1938/*
1939 * At this stage we don't really know what layout driver will handle the request,
1940 * so we need to define an arbitrary upper bound here.
1941 */
1942#define MAX_LAYOUT_SIZE 128
1943static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1944{
1945 return (op_encode_hdr_size +
1946 1 /* logr_return_on_close */ +
1947 op_encode_stateid_maxsz +
1948 1 /* nr of layouts */ +
1949 MAX_LAYOUT_SIZE) * sizeof(__be32);
1950}
1951
1952static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1953{
1954 return (op_encode_hdr_size +
1955 1 /* locr_newsize */ +
1956 2 /* ns_size */) * sizeof(__be32);
1957}
1958
1959static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1960{
1961 return (op_encode_hdr_size +
1962 1 /* lrs_stateid */ +
1963 op_encode_stateid_maxsz) * sizeof(__be32);
1964}
1965#endif /* CONFIG_NFSD_PNFS */
1966
1682static struct nfsd4_operation nfsd4_ops[] = { 1967static struct nfsd4_operation nfsd4_ops[] = {
1683 [OP_ACCESS] = { 1968 [OP_ACCESS] = {
1684 .op_func = (nfsd4op_func)nfsd4_access, 1969 .op_func = (nfsd4op_func)nfsd4_access,
@@ -1966,6 +2251,31 @@ static struct nfsd4_operation nfsd4_ops[] = {
1966 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, 2251 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
1967 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 2252 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1968 }, 2253 },
2254#ifdef CONFIG_NFSD_PNFS
2255 [OP_GETDEVICEINFO] = {
2256 .op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
2257 .op_flags = ALLOWED_WITHOUT_FH,
2258 .op_name = "OP_GETDEVICEINFO",
2259 },
2260 [OP_LAYOUTGET] = {
2261 .op_func = (nfsd4op_func)nfsd4_layoutget,
2262 .op_flags = OP_MODIFIES_SOMETHING,
2263 .op_name = "OP_LAYOUTGET",
2264 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
2265 },
2266 [OP_LAYOUTCOMMIT] = {
2267 .op_func = (nfsd4op_func)nfsd4_layoutcommit,
2268 .op_flags = OP_MODIFIES_SOMETHING,
2269 .op_name = "OP_LAYOUTCOMMIT",
2270 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
2271 },
2272 [OP_LAYOUTRETURN] = {
2273 .op_func = (nfsd4op_func)nfsd4_layoutreturn,
2274 .op_flags = OP_MODIFIES_SOMETHING,
2275 .op_name = "OP_LAYOUTRETURN",
2276 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
2277 },
2278#endif /* CONFIG_NFSD_PNFS */
1969 2279
1970 /* NFSv4.2 operations */ 2280 /* NFSv4.2 operations */
1971 [OP_ALLOCATE] = { 2281 [OP_ALLOCATE] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c06a1ba80d73..f6b2a09f793f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,6 +48,7 @@
48#include "current_stateid.h" 48#include "current_stateid.h"
49 49
50#include "netns.h" 50#include "netns.h"
51#include "pnfs.h"
51 52
52#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
53 54
@@ -150,16 +151,6 @@ renew_client_locked(struct nfs4_client *clp)
150 clp->cl_time = get_seconds(); 151 clp->cl_time = get_seconds();
151} 152}
152 153
153static inline void
154renew_client(struct nfs4_client *clp)
155{
156 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
157
158 spin_lock(&nn->client_lock);
159 renew_client_locked(clp);
160 spin_unlock(&nn->client_lock);
161}
162
163static void put_client_renew_locked(struct nfs4_client *clp) 154static void put_client_renew_locked(struct nfs4_client *clp)
164{ 155{
165 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 156 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -282,7 +273,7 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
282 kmem_cache_free(file_slab, fp); 273 kmem_cache_free(file_slab, fp);
283} 274}
284 275
285static inline void 276void
286put_nfs4_file(struct nfs4_file *fi) 277put_nfs4_file(struct nfs4_file *fi)
287{ 278{
288 might_lock(&state_lock); 279 might_lock(&state_lock);
@@ -295,12 +286,6 @@ put_nfs4_file(struct nfs4_file *fi)
295 } 286 }
296} 287}
297 288
298static inline void
299get_nfs4_file(struct nfs4_file *fi)
300{
301 atomic_inc(&fi->fi_ref);
302}
303
304static struct file * 289static struct file *
305__nfs4_get_fd(struct nfs4_file *f, int oflag) 290__nfs4_get_fd(struct nfs4_file *f, int oflag)
306{ 291{
@@ -358,7 +343,7 @@ find_readable_file(struct nfs4_file *f)
358 return ret; 343 return ret;
359} 344}
360 345
361static struct file * 346struct file *
362find_any_file(struct nfs4_file *f) 347find_any_file(struct nfs4_file *f)
363{ 348{
364 struct file *ret; 349 struct file *ret;
@@ -408,14 +393,6 @@ static unsigned int file_hashval(struct knfsd_fh *fh)
408 return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); 393 return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
409} 394}
410 395
411static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
412{
413 return fh1->fh_size == fh2->fh_size &&
414 !memcmp(fh1->fh_base.fh_pad,
415 fh2->fh_base.fh_pad,
416 fh1->fh_size);
417}
418
419static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; 396static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
420 397
421static void 398static void
@@ -494,7 +471,7 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
494 __nfs4_file_put_access(fp, O_RDONLY); 471 __nfs4_file_put_access(fp, O_RDONLY);
495} 472}
496 473
497static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, 474struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
498 struct kmem_cache *slab) 475 struct kmem_cache *slab)
499{ 476{
500 struct nfs4_stid *stid; 477 struct nfs4_stid *stid;
@@ -688,17 +665,17 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
688 struct file *filp = NULL; 665 struct file *filp = NULL;
689 666
690 spin_lock(&fp->fi_lock); 667 spin_lock(&fp->fi_lock);
691 if (fp->fi_deleg_file && atomic_dec_and_test(&fp->fi_delegees)) 668 if (fp->fi_deleg_file && --fp->fi_delegees == 0)
692 swap(filp, fp->fi_deleg_file); 669 swap(filp, fp->fi_deleg_file);
693 spin_unlock(&fp->fi_lock); 670 spin_unlock(&fp->fi_lock);
694 671
695 if (filp) { 672 if (filp) {
696 vfs_setlease(filp, F_UNLCK, NULL, NULL); 673 vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp);
697 fput(filp); 674 fput(filp);
698 } 675 }
699} 676}
700 677
701static void unhash_stid(struct nfs4_stid *s) 678void nfs4_unhash_stid(struct nfs4_stid *s)
702{ 679{
703 s->sc_type = 0; 680 s->sc_type = 0;
704} 681}
@@ -1006,7 +983,7 @@ static void unhash_lock_stateid(struct nfs4_ol_stateid *stp)
1006 983
1007 list_del_init(&stp->st_locks); 984 list_del_init(&stp->st_locks);
1008 unhash_ol_stateid(stp); 985 unhash_ol_stateid(stp);
1009 unhash_stid(&stp->st_stid); 986 nfs4_unhash_stid(&stp->st_stid);
1010} 987}
1011 988
1012static void release_lock_stateid(struct nfs4_ol_stateid *stp) 989static void release_lock_stateid(struct nfs4_ol_stateid *stp)
@@ -1518,7 +1495,12 @@ unhash_session(struct nfsd4_session *ses)
1518static int 1495static int
1519STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) 1496STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
1520{ 1497{
1521 if (clid->cl_boot == nn->boot_time) 1498 /*
1499 * We're assuming the clid was not given out from a boot
1500 * precisely 2^32 (about 136 years) before this one. That seems
1501 * a safe assumption:
1502 */
1503 if (clid->cl_boot == (u32)nn->boot_time)
1522 return 0; 1504 return 0;
1523 dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", 1505 dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
1524 clid->cl_boot, clid->cl_id, nn->boot_time); 1506 clid->cl_boot, clid->cl_id, nn->boot_time);
@@ -1558,6 +1540,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1558 INIT_LIST_HEAD(&clp->cl_lru); 1540 INIT_LIST_HEAD(&clp->cl_lru);
1559 INIT_LIST_HEAD(&clp->cl_callbacks); 1541 INIT_LIST_HEAD(&clp->cl_callbacks);
1560 INIT_LIST_HEAD(&clp->cl_revoked); 1542 INIT_LIST_HEAD(&clp->cl_revoked);
1543#ifdef CONFIG_NFSD_PNFS
1544 INIT_LIST_HEAD(&clp->cl_lo_states);
1545#endif
1561 spin_lock_init(&clp->cl_lock); 1546 spin_lock_init(&clp->cl_lock);
1562 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1547 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
1563 return clp; 1548 return clp;
@@ -1662,6 +1647,7 @@ __destroy_client(struct nfs4_client *clp)
1662 nfs4_get_stateowner(&oo->oo_owner); 1647 nfs4_get_stateowner(&oo->oo_owner);
1663 release_openowner(oo); 1648 release_openowner(oo);
1664 } 1649 }
1650 nfsd4_return_all_client_layouts(clp);
1665 nfsd4_shutdown_callback(clp); 1651 nfsd4_shutdown_callback(clp);
1666 if (clp->cl_cb_conn.cb_xprt) 1652 if (clp->cl_cb_conn.cb_xprt)
1667 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 1653 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
@@ -2145,8 +2131,11 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
2145static void 2131static void
2146nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) 2132nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
2147{ 2133{
2148 /* pNFS is not supported */ 2134#ifdef CONFIG_NFSD_PNFS
2135 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS;
2136#else
2149 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; 2137 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
2138#endif
2150 2139
2151 /* Referrals are supported, Migration is not. */ 2140 /* Referrals are supported, Migration is not. */
2152 new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; 2141 new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
@@ -3074,6 +3063,10 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3074 fp->fi_share_deny = 0; 3063 fp->fi_share_deny = 0;
3075 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 3064 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
3076 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 3065 memset(fp->fi_access, 0, sizeof(fp->fi_access));
3066#ifdef CONFIG_NFSD_PNFS
3067 INIT_LIST_HEAD(&fp->fi_lo_states);
3068 atomic_set(&fp->fi_lo_recalls, 0);
3069#endif
3077 hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); 3070 hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
3078} 3071}
3079 3072
@@ -3300,7 +3293,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
3300 struct nfs4_file *fp; 3293 struct nfs4_file *fp;
3301 3294
3302 hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { 3295 hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
3303 if (nfsd_fh_match(&fp->fi_fhandle, fh)) { 3296 if (fh_match(&fp->fi_fhandle, fh)) {
3304 if (atomic_inc_not_zero(&fp->fi_ref)) 3297 if (atomic_inc_not_zero(&fp->fi_ref))
3305 return fp; 3298 return fp;
3306 } 3299 }
@@ -3308,7 +3301,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
3308 return NULL; 3301 return NULL;
3309} 3302}
3310 3303
3311static struct nfs4_file * 3304struct nfs4_file *
3312find_file(struct knfsd_fh *fh) 3305find_file(struct knfsd_fh *fh)
3313{ 3306{
3314 struct nfs4_file *fp; 3307 struct nfs4_file *fp;
@@ -3477,7 +3470,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
3477} 3470}
3478 3471
3479static int 3472static int
3480nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose) 3473nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
3474 struct list_head *dispose)
3481{ 3475{
3482 if (arg & F_UNLCK) 3476 if (arg & F_UNLCK)
3483 return lease_modify(onlist, arg, dispose); 3477 return lease_modify(onlist, arg, dispose);
@@ -3855,12 +3849,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
3855 /* Race breaker */ 3849 /* Race breaker */
3856 if (fp->fi_deleg_file) { 3850 if (fp->fi_deleg_file) {
3857 status = 0; 3851 status = 0;
3858 atomic_inc(&fp->fi_delegees); 3852 ++fp->fi_delegees;
3859 hash_delegation_locked(dp, fp); 3853 hash_delegation_locked(dp, fp);
3860 goto out_unlock; 3854 goto out_unlock;
3861 } 3855 }
3862 fp->fi_deleg_file = filp; 3856 fp->fi_deleg_file = filp;
3863 atomic_set(&fp->fi_delegees, 1); 3857 fp->fi_delegees = 1;
3864 hash_delegation_locked(dp, fp); 3858 hash_delegation_locked(dp, fp);
3865 spin_unlock(&fp->fi_lock); 3859 spin_unlock(&fp->fi_lock);
3866 spin_unlock(&state_lock); 3860 spin_unlock(&state_lock);
@@ -3901,7 +3895,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
3901 status = -EAGAIN; 3895 status = -EAGAIN;
3902 goto out_unlock; 3896 goto out_unlock;
3903 } 3897 }
3904 atomic_inc(&fp->fi_delegees); 3898 ++fp->fi_delegees;
3905 hash_delegation_locked(dp, fp); 3899 hash_delegation_locked(dp, fp);
3906 status = 0; 3900 status = 0;
3907out_unlock: 3901out_unlock:
@@ -4294,7 +4288,7 @@ laundromat_main(struct work_struct *laundry)
4294 4288
4295static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) 4289static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
4296{ 4290{
4297 if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) 4291 if (!fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle))
4298 return nfserr_bad_stateid; 4292 return nfserr_bad_stateid;
4299 return nfs_ok; 4293 return nfs_ok;
4300} 4294}
@@ -4445,7 +4439,7 @@ out_unlock:
4445 return status; 4439 return status;
4446} 4440}
4447 4441
4448static __be32 4442__be32
4449nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, 4443nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
4450 stateid_t *stateid, unsigned char typemask, 4444 stateid_t *stateid, unsigned char typemask,
4451 struct nfs4_stid **s, struct nfsd_net *nn) 4445 struct nfs4_stid **s, struct nfsd_net *nn)
@@ -4859,6 +4853,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4859 update_stateid(&stp->st_stid.sc_stateid); 4853 update_stateid(&stp->st_stid.sc_stateid);
4860 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4854 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4861 4855
4856 nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
4857 stp->st_stid.sc_file);
4858
4862 nfsd4_close_open_stateid(stp); 4859 nfsd4_close_open_stateid(stp);
4863 4860
4864 /* put reference from nfs4_preprocess_seqid_op */ 4861 /* put reference from nfs4_preprocess_seqid_op */
@@ -5556,10 +5553,11 @@ out_nfserr:
5556static bool 5553static bool
5557check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) 5554check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
5558{ 5555{
5559 struct file_lock **flpp; 5556 struct file_lock *fl;
5560 int status = false; 5557 int status = false;
5561 struct file *filp = find_any_file(fp); 5558 struct file *filp = find_any_file(fp);
5562 struct inode *inode; 5559 struct inode *inode;
5560 struct file_lock_context *flctx;
5563 5561
5564 if (!filp) { 5562 if (!filp) {
5565 /* Any valid lock stateid should have some sort of access */ 5563 /* Any valid lock stateid should have some sort of access */
@@ -5568,15 +5566,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
5568 } 5566 }
5569 5567
5570 inode = file_inode(filp); 5568 inode = file_inode(filp);
5569 flctx = inode->i_flctx;
5571 5570
5572 spin_lock(&inode->i_lock); 5571 if (flctx && !list_empty_careful(&flctx->flc_posix)) {
5573 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { 5572 spin_lock(&flctx->flc_lock);
5574 if ((*flpp)->fl_owner == (fl_owner_t)lowner) { 5573 list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
5575 status = true; 5574 if (fl->fl_owner == (fl_owner_t)lowner) {
5576 break; 5575 status = true;
5576 break;
5577 }
5577 } 5578 }
5579 spin_unlock(&flctx->flc_lock);
5578 } 5580 }
5579 spin_unlock(&inode->i_lock);
5580 fput(filp); 5581 fput(filp);
5581 return status; 5582 return status;
5582} 5583}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 15f7b73e0c0f..df5e66caf100 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -47,6 +47,7 @@
47#include "state.h" 47#include "state.h"
48#include "cache.h" 48#include "cache.h"
49#include "netns.h" 49#include "netns.h"
50#include "pnfs.h"
50 51
51#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 52#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
52#include <linux/security.h> 53#include <linux/security.h>
@@ -234,6 +235,26 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
234 return ret; 235 return ret;
235} 236}
236 237
238/*
239 * We require the high 32 bits of 'seconds' to be 0, and
240 * we ignore all 32 bits of 'nseconds'.
241 */
242static __be32
243nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
244{
245 DECODE_HEAD;
246 u64 sec;
247
248 READ_BUF(12);
249 p = xdr_decode_hyper(p, &sec);
250 tv->tv_sec = sec;
251 tv->tv_nsec = be32_to_cpup(p++);
252 if (tv->tv_nsec >= (u32)1000000000)
253 return nfserr_inval;
254
255 DECODE_TAIL;
256}
257
237static __be32 258static __be32
238nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) 259nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
239{ 260{
@@ -267,7 +288,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
267{ 288{
268 int expected_len, len = 0; 289 int expected_len, len = 0;
269 u32 dummy32; 290 u32 dummy32;
270 u64 sec;
271 char *buf; 291 char *buf;
272 292
273 DECODE_HEAD; 293 DECODE_HEAD;
@@ -358,15 +378,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
358 dummy32 = be32_to_cpup(p++); 378 dummy32 = be32_to_cpup(p++);
359 switch (dummy32) { 379 switch (dummy32) {
360 case NFS4_SET_TO_CLIENT_TIME: 380 case NFS4_SET_TO_CLIENT_TIME:
361 /* We require the high 32 bits of 'seconds' to be 0, and we ignore
362 all 32 bits of 'nseconds'. */
363 READ_BUF(12);
364 len += 12; 381 len += 12;
365 p = xdr_decode_hyper(p, &sec); 382 status = nfsd4_decode_time(argp, &iattr->ia_atime);
366 iattr->ia_atime.tv_sec = (time_t)sec; 383 if (status)
367 iattr->ia_atime.tv_nsec = be32_to_cpup(p++); 384 return status;
368 if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
369 return nfserr_inval;
370 iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); 385 iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
371 break; 386 break;
372 case NFS4_SET_TO_SERVER_TIME: 387 case NFS4_SET_TO_SERVER_TIME:
@@ -382,15 +397,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
382 dummy32 = be32_to_cpup(p++); 397 dummy32 = be32_to_cpup(p++);
383 switch (dummy32) { 398 switch (dummy32) {
384 case NFS4_SET_TO_CLIENT_TIME: 399 case NFS4_SET_TO_CLIENT_TIME:
385 /* We require the high 32 bits of 'seconds' to be 0, and we ignore
386 all 32 bits of 'nseconds'. */
387 READ_BUF(12);
388 len += 12; 400 len += 12;
389 p = xdr_decode_hyper(p, &sec); 401 status = nfsd4_decode_time(argp, &iattr->ia_mtime);
390 iattr->ia_mtime.tv_sec = sec; 402 if (status)
391 iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); 403 return status;
392 if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
393 return nfserr_inval;
394 iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); 404 iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
395 break; 405 break;
396 case NFS4_SET_TO_SERVER_TIME: 406 case NFS4_SET_TO_SERVER_TIME:
@@ -1513,6 +1523,127 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
1513 DECODE_TAIL; 1523 DECODE_TAIL;
1514} 1524}
1515 1525
1526#ifdef CONFIG_NFSD_PNFS
1527static __be32
1528nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
1529 struct nfsd4_getdeviceinfo *gdev)
1530{
1531 DECODE_HEAD;
1532 u32 num, i;
1533
1534 READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
1535 COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
1536 gdev->gd_layout_type = be32_to_cpup(p++);
1537 gdev->gd_maxcount = be32_to_cpup(p++);
1538 num = be32_to_cpup(p++);
1539 if (num) {
1540 READ_BUF(4 * num);
1541 gdev->gd_notify_types = be32_to_cpup(p++);
1542 for (i = 1; i < num; i++) {
1543 if (be32_to_cpup(p++)) {
1544 status = nfserr_inval;
1545 goto out;
1546 }
1547 }
1548 }
1549 DECODE_TAIL;
1550}
1551
1552static __be32
1553nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
1554 struct nfsd4_layoutget *lgp)
1555{
1556 DECODE_HEAD;
1557
1558 READ_BUF(36);
1559 lgp->lg_signal = be32_to_cpup(p++);
1560 lgp->lg_layout_type = be32_to_cpup(p++);
1561 lgp->lg_seg.iomode = be32_to_cpup(p++);
1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length);
1564 p = xdr_decode_hyper(p, &lgp->lg_minlength);
1565 nfsd4_decode_stateid(argp, &lgp->lg_sid);
1566 READ_BUF(4);
1567 lgp->lg_maxcount = be32_to_cpup(p++);
1568
1569 DECODE_TAIL;
1570}
1571
1572static __be32
1573nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
1574 struct nfsd4_layoutcommit *lcp)
1575{
1576 DECODE_HEAD;
1577 u32 timechange;
1578
1579 READ_BUF(20);
1580 p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
1581 p = xdr_decode_hyper(p, &lcp->lc_seg.length);
1582 lcp->lc_reclaim = be32_to_cpup(p++);
1583 nfsd4_decode_stateid(argp, &lcp->lc_sid);
1584 READ_BUF(4);
1585 lcp->lc_newoffset = be32_to_cpup(p++);
1586 if (lcp->lc_newoffset) {
1587 READ_BUF(8);
1588 p = xdr_decode_hyper(p, &lcp->lc_last_wr);
1589 } else
1590 lcp->lc_last_wr = 0;
1591 READ_BUF(4);
1592 timechange = be32_to_cpup(p++);
1593 if (timechange) {
1594 status = nfsd4_decode_time(argp, &lcp->lc_mtime);
1595 if (status)
1596 return status;
1597 } else {
1598 lcp->lc_mtime.tv_nsec = UTIME_NOW;
1599 }
1600 READ_BUF(8);
1601 lcp->lc_layout_type = be32_to_cpup(p++);
1602
1603 /*
1604 * Save the layout update in XDR format and let the layout driver deal
1605 * with it later.
1606 */
1607 lcp->lc_up_len = be32_to_cpup(p++);
1608 if (lcp->lc_up_len > 0) {
1609 READ_BUF(lcp->lc_up_len);
1610 READMEM(lcp->lc_up_layout, lcp->lc_up_len);
1611 }
1612
1613 DECODE_TAIL;
1614}
1615
1616static __be32
1617nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
1618 struct nfsd4_layoutreturn *lrp)
1619{
1620 DECODE_HEAD;
1621
1622 READ_BUF(16);
1623 lrp->lr_reclaim = be32_to_cpup(p++);
1624 lrp->lr_layout_type = be32_to_cpup(p++);
1625 lrp->lr_seg.iomode = be32_to_cpup(p++);
1626 lrp->lr_return_type = be32_to_cpup(p++);
1627 if (lrp->lr_return_type == RETURN_FILE) {
1628 READ_BUF(16);
1629 p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
1630 p = xdr_decode_hyper(p, &lrp->lr_seg.length);
1631 nfsd4_decode_stateid(argp, &lrp->lr_sid);
1632 READ_BUF(4);
1633 lrp->lrf_body_len = be32_to_cpup(p++);
1634 if (lrp->lrf_body_len > 0) {
1635 READ_BUF(lrp->lrf_body_len);
1636 READMEM(lrp->lrf_body, lrp->lrf_body_len);
1637 }
1638 } else {
1639 lrp->lr_seg.offset = 0;
1640 lrp->lr_seg.length = NFS4_MAX_UINT64;
1641 }
1642
1643 DECODE_TAIL;
1644}
1645#endif /* CONFIG_NFSD_PNFS */
1646
1516static __be32 1647static __be32
1517nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, 1648nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
1518 struct nfsd4_fallocate *fallocate) 1649 struct nfsd4_fallocate *fallocate)
@@ -1607,11 +1738,19 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1607 [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, 1738 [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
1608 [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, 1739 [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
1609 [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1740 [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1741#ifdef CONFIG_NFSD_PNFS
1742 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
1743 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
1744 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
1745 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
1746 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
1747#else
1610 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, 1748 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
1611 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, 1749 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
1612 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, 1750 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
1613 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, 1751 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
1614 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, 1752 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
1753#endif
1615 [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, 1754 [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
1616 [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, 1755 [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
1617 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, 1756 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2539,6 +2678,30 @@ out_acl:
2539 get_parent_attributes(exp, &stat); 2678 get_parent_attributes(exp, &stat);
2540 p = xdr_encode_hyper(p, stat.ino); 2679 p = xdr_encode_hyper(p, stat.ino);
2541 } 2680 }
2681#ifdef CONFIG_NFSD_PNFS
2682 if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) ||
2683 (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) {
2684 if (exp->ex_layout_type) {
2685 p = xdr_reserve_space(xdr, 8);
2686 if (!p)
2687 goto out_resource;
2688 *p++ = cpu_to_be32(1);
2689 *p++ = cpu_to_be32(exp->ex_layout_type);
2690 } else {
2691 p = xdr_reserve_space(xdr, 4);
2692 if (!p)
2693 goto out_resource;
2694 *p++ = cpu_to_be32(0);
2695 }
2696 }
2697
2698 if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
2699 p = xdr_reserve_space(xdr, 4);
2700 if (!p)
2701 goto out_resource;
2702 *p++ = cpu_to_be32(stat.blksize);
2703 }
2704#endif /* CONFIG_NFSD_PNFS */
2542 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { 2705 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
2543 status = nfsd4_encode_security_label(xdr, rqstp, context, 2706 status = nfsd4_encode_security_label(xdr, rqstp, context,
2544 contextlen); 2707 contextlen);
@@ -2768,16 +2931,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
2768 if (entry_bytes > cd->rd_maxcount) 2931 if (entry_bytes > cd->rd_maxcount)
2769 goto fail; 2932 goto fail;
2770 cd->rd_maxcount -= entry_bytes; 2933 cd->rd_maxcount -= entry_bytes;
2771 if (!cd->rd_dircount)
2772 goto fail;
2773 /* 2934 /*
2774 * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so 2935 * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
2775 * let's always let through the first entry, at least: 2936 * let's always let through the first entry, at least:
2776 */ 2937 */
2777 name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; 2938 if (!cd->rd_dircount)
2939 goto fail;
2940 name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
2778 if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) 2941 if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
2779 goto fail; 2942 goto fail;
2780 cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); 2943 cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
2944
2781 cd->cookie_offset = cookie_offset; 2945 cd->cookie_offset = cookie_offset;
2782skip_entry: 2946skip_entry:
2783 cd->common.err = nfs_ok; 2947 cd->common.err = nfs_ok;
@@ -3814,6 +3978,156 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
3814 return nfserr; 3978 return nfserr;
3815} 3979}
3816 3980
3981#ifdef CONFIG_NFSD_PNFS
3982static __be32
3983nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
3984 struct nfsd4_getdeviceinfo *gdev)
3985{
3986 struct xdr_stream *xdr = &resp->xdr;
3987 const struct nfsd4_layout_ops *ops =
3988 nfsd4_layout_ops[gdev->gd_layout_type];
3989 u32 starting_len = xdr->buf->len, needed_len;
3990 __be32 *p;
3991
3992 dprintk("%s: err %d\n", __func__, nfserr);
3993 if (nfserr)
3994 goto out;
3995
3996 nfserr = nfserr_resource;
3997 p = xdr_reserve_space(xdr, 4);
3998 if (!p)
3999 goto out;
4000
4001 *p++ = cpu_to_be32(gdev->gd_layout_type);
4002
4003 /* If maxcount is 0 then just update notifications */
4004 if (gdev->gd_maxcount != 0) {
4005 nfserr = ops->encode_getdeviceinfo(xdr, gdev);
4006 if (nfserr) {
4007 /*
4008 * We don't bother to burden the layout drivers with
4009 * enforcing gd_maxcount, just tell the client to
4010 * come back with a bigger buffer if it's not enough.
4011 */
4012 if (xdr->buf->len + 4 > gdev->gd_maxcount)
4013 goto toosmall;
4014 goto out;
4015 }
4016 }
4017
4018 nfserr = nfserr_resource;
4019 if (gdev->gd_notify_types) {
4020 p = xdr_reserve_space(xdr, 4 + 4);
4021 if (!p)
4022 goto out;
4023 *p++ = cpu_to_be32(1); /* bitmap length */
4024 *p++ = cpu_to_be32(gdev->gd_notify_types);
4025 } else {
4026 p = xdr_reserve_space(xdr, 4);
4027 if (!p)
4028 goto out;
4029 *p++ = 0;
4030 }
4031
4032 nfserr = 0;
4033out:
4034 kfree(gdev->gd_device);
4035 dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr));
4036 return nfserr;
4037
4038toosmall:
4039 dprintk("%s: maxcount too small\n", __func__);
4040 needed_len = xdr->buf->len + 4 /* notifications */;
4041 xdr_truncate_encode(xdr, starting_len);
4042 p = xdr_reserve_space(xdr, 4);
4043 if (!p) {
4044 nfserr = nfserr_resource;
4045 } else {
4046 *p++ = cpu_to_be32(needed_len);
4047 nfserr = nfserr_toosmall;
4048 }
4049 goto out;
4050}
4051
4052static __be32
4053nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
4054 struct nfsd4_layoutget *lgp)
4055{
4056 struct xdr_stream *xdr = &resp->xdr;
4057 const struct nfsd4_layout_ops *ops =
4058 nfsd4_layout_ops[lgp->lg_layout_type];
4059 __be32 *p;
4060
4061 dprintk("%s: err %d\n", __func__, nfserr);
4062 if (nfserr)
4063 goto out;
4064
4065 nfserr = nfserr_resource;
4066 p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t));
4067 if (!p)
4068 goto out;
4069
4070 *p++ = cpu_to_be32(1); /* we always set return-on-close */
4071 *p++ = cpu_to_be32(lgp->lg_sid.si_generation);
4072 p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque,
4073 sizeof(stateid_opaque_t));
4074
4075 *p++ = cpu_to_be32(1); /* we always return a single layout */
4076 p = xdr_encode_hyper(p, lgp->lg_seg.offset);
4077 p = xdr_encode_hyper(p, lgp->lg_seg.length);
4078 *p++ = cpu_to_be32(lgp->lg_seg.iomode);
4079 *p++ = cpu_to_be32(lgp->lg_layout_type);
4080
4081 nfserr = ops->encode_layoutget(xdr, lgp);
4082out:
4083 kfree(lgp->lg_content);
4084 return nfserr;
4085}
4086
4087static __be32
4088nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
4089 struct nfsd4_layoutcommit *lcp)
4090{
4091 struct xdr_stream *xdr = &resp->xdr;
4092 __be32 *p;
4093
4094 if (nfserr)
4095 return nfserr;
4096
4097 p = xdr_reserve_space(xdr, 4);
4098 if (!p)
4099 return nfserr_resource;
4100 *p++ = cpu_to_be32(lcp->lc_size_chg);
4101 if (lcp->lc_size_chg) {
4102 p = xdr_reserve_space(xdr, 8);
4103 if (!p)
4104 return nfserr_resource;
4105 p = xdr_encode_hyper(p, lcp->lc_newsize);
4106 }
4107
4108 return nfs_ok;
4109}
4110
4111static __be32
4112nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
4113 struct nfsd4_layoutreturn *lrp)
4114{
4115 struct xdr_stream *xdr = &resp->xdr;
4116 __be32 *p;
4117
4118 if (nfserr)
4119 return nfserr;
4120
4121 p = xdr_reserve_space(xdr, 4);
4122 if (!p)
4123 return nfserr_resource;
4124 *p++ = cpu_to_be32(lrp->lrs_present);
4125 if (lrp->lrs_present)
4126 nfsd4_encode_stateid(xdr, &lrp->lr_sid);
4127 return nfs_ok;
4128}
4129#endif /* CONFIG_NFSD_PNFS */
4130
3817static __be32 4131static __be32
3818nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, 4132nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
3819 struct nfsd4_seek *seek) 4133 struct nfsd4_seek *seek)
@@ -3890,11 +4204,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
3890 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, 4204 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
3891 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, 4205 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
3892 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, 4206 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
4207#ifdef CONFIG_NFSD_PNFS
4208 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
4209 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
4210 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
4211 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
4212 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
4213#else
3893 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, 4214 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
3894 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, 4215 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
3895 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, 4216 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
3896 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, 4217 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
3897 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, 4218 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
4219#endif
3898 [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, 4220 [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
3899 [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, 4221 [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
3900 [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, 4222 [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 19ace74d35f6..aa47d75ddb26 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -21,6 +21,7 @@
21#include "cache.h" 21#include "cache.h"
22#include "state.h" 22#include "state.h"
23#include "netns.h" 23#include "netns.h"
24#include "pnfs.h"
24 25
25/* 26/*
26 * We have a single directory with several nodes in it. 27 * We have a single directory with several nodes in it.
@@ -1258,9 +1259,12 @@ static int __init init_nfsd(void)
1258 retval = nfsd4_init_slabs(); 1259 retval = nfsd4_init_slabs();
1259 if (retval) 1260 if (retval)
1260 goto out_unregister_pernet; 1261 goto out_unregister_pernet;
1261 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ 1262 retval = nfsd4_init_pnfs();
1262 if (retval) 1263 if (retval)
1263 goto out_free_slabs; 1264 goto out_free_slabs;
1265 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
1266 if (retval)
1267 goto out_exit_pnfs;
1264 nfsd_stat_init(); /* Statistics */ 1268 nfsd_stat_init(); /* Statistics */
1265 retval = nfsd_reply_cache_init(); 1269 retval = nfsd_reply_cache_init();
1266 if (retval) 1270 if (retval)
@@ -1282,6 +1286,8 @@ out_free_lockd:
1282out_free_stat: 1286out_free_stat:
1283 nfsd_stat_shutdown(); 1287 nfsd_stat_shutdown();
1284 nfsd_fault_inject_cleanup(); 1288 nfsd_fault_inject_cleanup();
1289out_exit_pnfs:
1290 nfsd4_exit_pnfs();
1285out_free_slabs: 1291out_free_slabs:
1286 nfsd4_free_slabs(); 1292 nfsd4_free_slabs();
1287out_unregister_pernet: 1293out_unregister_pernet:
@@ -1299,6 +1305,7 @@ static void __exit exit_nfsd(void)
1299 nfsd_stat_shutdown(); 1305 nfsd_stat_shutdown();
1300 nfsd_lockd_shutdown(); 1306 nfsd_lockd_shutdown();
1301 nfsd4_free_slabs(); 1307 nfsd4_free_slabs();
1308 nfsd4_exit_pnfs();
1302 nfsd_fault_inject_cleanup(); 1309 nfsd_fault_inject_cleanup();
1303 unregister_filesystem(&nfsd_fs_type); 1310 unregister_filesystem(&nfsd_fs_type);
1304 unregister_pernet_subsys(&nfsd_net_ops); 1311 unregister_pernet_subsys(&nfsd_net_ops);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 33a46a8dfaf7..565c4da1a9eb 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -325,15 +325,27 @@ void nfsd_lockd_shutdown(void);
325 325
326#define NFSD4_SUPPORTED_ATTRS_WORD2 0 326#define NFSD4_SUPPORTED_ATTRS_WORD2 0
327 327
328/* 4.1 */
329#ifdef CONFIG_NFSD_PNFS
330#define PNFSD_SUPPORTED_ATTRS_WORD1 FATTR4_WORD1_FS_LAYOUT_TYPES
331#define PNFSD_SUPPORTED_ATTRS_WORD2 \
332(FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_LAYOUT_TYPES)
333#else
334#define PNFSD_SUPPORTED_ATTRS_WORD1 0
335#define PNFSD_SUPPORTED_ATTRS_WORD2 0
336#endif /* CONFIG_NFSD_PNFS */
337
328#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ 338#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
329 NFSD4_SUPPORTED_ATTRS_WORD0 339 NFSD4_SUPPORTED_ATTRS_WORD0
330 340
331#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ 341#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
332 NFSD4_SUPPORTED_ATTRS_WORD1 342 (NFSD4_SUPPORTED_ATTRS_WORD1 | PNFSD_SUPPORTED_ATTRS_WORD1)
333 343
334#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ 344#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
335 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) 345 (NFSD4_SUPPORTED_ATTRS_WORD2 | PNFSD_SUPPORTED_ATTRS_WORD2 | \
346 FATTR4_WORD2_SUPPATTR_EXCLCREAT)
336 347
348/* 4.2 */
337#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 349#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
338#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL 350#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL
339#else 351#else
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 08236d70c667..84cae2079d21 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -187,6 +187,24 @@ fh_init(struct svc_fh *fhp, int maxsize)
187 return fhp; 187 return fhp;
188} 188}
189 189
190static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
191{
192 if (fh1->fh_size != fh2->fh_size)
193 return false;
194 if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0)
195 return false;
196 return true;
197}
198
199static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
200{
201 if (fh1->fh_fsid_type != fh2->fh_fsid_type)
202 return false;
203 if (memcmp(fh1->fh_fsid, fh2->fh_fsid, key_len(fh1->fh_fsid_type) != 0))
204 return false;
205 return true;
206}
207
190#ifdef CONFIG_NFSD_V3 208#ifdef CONFIG_NFSD_V3
191/* 209/*
192 * The wcc data stored in current_fh should be cleared 210 * The wcc data stored in current_fh should be cleared
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 314f5c8f8f1a..9277cc91c21b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -119,6 +119,7 @@ struct svc_program nfsd_program = {
119static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { 119static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = {
120 [0] = 1, 120 [0] = 1,
121 [1] = 1, 121 [1] = 1,
122 [2] = 1,
122}; 123};
123 124
124int nfsd_vers(int vers, enum vers_op change) 125int nfsd_vers(int vers, enum vers_op change)
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
new file mode 100644
index 000000000000..fedb4d620a81
--- /dev/null
+++ b/fs/nfsd/pnfs.h
@@ -0,0 +1,81 @@
1#ifndef _FS_NFSD_PNFS_H
2#define _FS_NFSD_PNFS_H 1
3
4#include <linux/exportfs.h>
5#include <linux/nfsd/export.h>
6
7#include "state.h"
8#include "xdr4.h"
9
10struct xdr_stream;
11
12struct nfsd4_deviceid_map {
13 struct list_head hash;
14 u64 idx;
15 int fsid_type;
16 u32 fsid[];
17};
18
19struct nfsd4_layout_ops {
20 u32 notify_types;
21
22 __be32 (*proc_getdeviceinfo)(struct super_block *sb,
23 struct nfsd4_getdeviceinfo *gdevp);
24 __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
25 struct nfsd4_getdeviceinfo *gdevp);
26
27 __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
28 struct nfsd4_layoutget *lgp);
29 __be32 (*encode_layoutget)(struct xdr_stream *,
30 struct nfsd4_layoutget *lgp);
31
32 __be32 (*proc_layoutcommit)(struct inode *inode,
33 struct nfsd4_layoutcommit *lcp);
34};
35
36extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
37extern const struct nfsd4_layout_ops bl_layout_ops;
38
39__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
40 struct nfsd4_compound_state *cstate, stateid_t *stateid,
41 bool create, u32 layout_type, struct nfs4_layout_stateid **lsp);
42__be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp,
43 struct nfs4_layout_stateid *ls);
44__be32 nfsd4_return_file_layouts(struct svc_rqst *rqstp,
45 struct nfsd4_compound_state *cstate,
46 struct nfsd4_layoutreturn *lrp);
47__be32 nfsd4_return_client_layouts(struct svc_rqst *rqstp,
48 struct nfsd4_compound_state *cstate,
49 struct nfsd4_layoutreturn *lrp);
50int nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
51 u32 device_generation);
52struct nfsd4_deviceid_map *nfsd4_find_devid_map(int idx);
53
54#ifdef CONFIG_NFSD_PNFS
55void nfsd4_setup_layout_type(struct svc_export *exp);
56void nfsd4_return_all_client_layouts(struct nfs4_client *);
57void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
58 struct nfs4_file *fp);
59int nfsd4_init_pnfs(void);
60void nfsd4_exit_pnfs(void);
61#else
62static inline void nfsd4_setup_layout_type(struct svc_export *exp)
63{
64}
65
66static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp)
67{
68}
69static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
70 struct nfs4_file *fp)
71{
72}
73static inline void nfsd4_exit_pnfs(void)
74{
75}
76static inline int nfsd4_init_pnfs(void)
77{
78 return 0;
79}
80#endif /* CONFIG_NFSD_PNFS */
81#endif /* _FS_NFSD_PNFS_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9d3be371240a..4f3bfeb11766 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -92,6 +92,7 @@ struct nfs4_stid {
92/* For a deleg stateid kept around only to process free_stateid's: */ 92/* For a deleg stateid kept around only to process free_stateid's: */
93#define NFS4_REVOKED_DELEG_STID 16 93#define NFS4_REVOKED_DELEG_STID 16
94#define NFS4_CLOSED_DELEG_STID 32 94#define NFS4_CLOSED_DELEG_STID 32
95#define NFS4_LAYOUT_STID 64
95 unsigned char sc_type; 96 unsigned char sc_type;
96 stateid_t sc_stateid; 97 stateid_t sc_stateid;
97 struct nfs4_client *sc_client; 98 struct nfs4_client *sc_client;
@@ -297,6 +298,9 @@ struct nfs4_client {
297 struct list_head cl_delegations; 298 struct list_head cl_delegations;
298 struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ 299 struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */
299 struct list_head cl_lru; /* tail queue */ 300 struct list_head cl_lru; /* tail queue */
301#ifdef CONFIG_NFSD_PNFS
302 struct list_head cl_lo_states; /* outstanding layout states */
303#endif
300 struct xdr_netobj cl_name; /* id generated by client */ 304 struct xdr_netobj cl_name; /* id generated by client */
301 nfs4_verifier cl_verifier; /* generated by client */ 305 nfs4_verifier cl_verifier; /* generated by client */
302 time_t cl_time; /* time of last lease renewal */ 306 time_t cl_time; /* time of last lease renewal */
@@ -493,9 +497,13 @@ struct nfs4_file {
493 atomic_t fi_access[2]; 497 atomic_t fi_access[2];
494 u32 fi_share_deny; 498 u32 fi_share_deny;
495 struct file *fi_deleg_file; 499 struct file *fi_deleg_file;
496 atomic_t fi_delegees; 500 int fi_delegees;
497 struct knfsd_fh fi_fhandle; 501 struct knfsd_fh fi_fhandle;
498 bool fi_had_conflict; 502 bool fi_had_conflict;
503#ifdef CONFIG_NFSD_PNFS
504 struct list_head fi_lo_states;
505 atomic_t fi_lo_recalls;
506#endif
499}; 507};
500 508
501/* 509/*
@@ -528,6 +536,24 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
528 return container_of(s, struct nfs4_ol_stateid, st_stid); 536 return container_of(s, struct nfs4_ol_stateid, st_stid);
529} 537}
530 538
539struct nfs4_layout_stateid {
540 struct nfs4_stid ls_stid;
541 struct list_head ls_perclnt;
542 struct list_head ls_perfile;
543 spinlock_t ls_lock;
544 struct list_head ls_layouts;
545 u32 ls_layout_type;
546 struct file *ls_file;
547 struct nfsd4_callback ls_recall;
548 stateid_t ls_recall_sid;
549 bool ls_recalled;
550};
551
552static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
553{
554 return container_of(s, struct nfs4_layout_stateid, ls_stid);
555}
556
531/* flags for preprocess_seqid_op() */ 557/* flags for preprocess_seqid_op() */
532#define RD_STATE 0x00000010 558#define RD_STATE 0x00000010
533#define WR_STATE 0x00000020 559#define WR_STATE 0x00000020
@@ -535,6 +561,7 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
535enum nfsd4_cb_op { 561enum nfsd4_cb_op {
536 NFSPROC4_CLNT_CB_NULL = 0, 562 NFSPROC4_CLNT_CB_NULL = 0,
537 NFSPROC4_CLNT_CB_RECALL, 563 NFSPROC4_CLNT_CB_RECALL,
564 NFSPROC4_CLNT_CB_LAYOUT,
538 NFSPROC4_CLNT_CB_SEQUENCE, 565 NFSPROC4_CLNT_CB_SEQUENCE,
539}; 566};
540 567
@@ -545,6 +572,12 @@ struct nfsd_net;
545extern __be32 nfs4_preprocess_stateid_op(struct net *net, 572extern __be32 nfs4_preprocess_stateid_op(struct net *net,
546 struct nfsd4_compound_state *cstate, 573 struct nfsd4_compound_state *cstate,
547 stateid_t *stateid, int flags, struct file **filp); 574 stateid_t *stateid, int flags, struct file **filp);
575__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
576 stateid_t *stateid, unsigned char typemask,
577 struct nfs4_stid **s, struct nfsd_net *nn);
578struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
579 struct kmem_cache *slab);
580void nfs4_unhash_stid(struct nfs4_stid *s);
548void nfs4_put_stid(struct nfs4_stid *s); 581void nfs4_put_stid(struct nfs4_stid *s);
549void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); 582void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
550extern void nfs4_release_reclaim(struct nfsd_net *); 583extern void nfs4_release_reclaim(struct nfsd_net *);
@@ -567,6 +600,14 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
567 struct nfsd_net *nn); 600 struct nfsd_net *nn);
568extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); 601extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
569 602
603struct nfs4_file *find_file(struct knfsd_fh *fh);
604void put_nfs4_file(struct nfs4_file *fi);
605static inline void get_nfs4_file(struct nfs4_file *fi)
606{
607 atomic_inc(&fi->fi_ref);
608}
609struct file *find_any_file(struct nfs4_file *f);
610
570/* grace period management */ 611/* grace period management */
571void nfsd4_end_grace(struct nfsd_net *nn); 612void nfsd4_end_grace(struct nfsd_net *nn);
572 613
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c
new file mode 100644
index 000000000000..82f89070594c
--- /dev/null
+++ b/fs/nfsd/trace.c
@@ -0,0 +1,5 @@
1
2#include "state.h"
3
4#define CREATE_TRACE_POINTS
5#include "trace.h"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
new file mode 100644
index 000000000000..c668520c344b
--- /dev/null
+++ b/fs/nfsd/trace.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#undef TRACE_SYSTEM
5#define TRACE_SYSTEM nfsd
6
7#if !defined(_NFSD_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
8#define _NFSD_TRACE_H
9
10#include <linux/tracepoint.h>
11
12DECLARE_EVENT_CLASS(nfsd_stateid_class,
13 TP_PROTO(stateid_t *stp),
14 TP_ARGS(stp),
15 TP_STRUCT__entry(
16 __field(u32, cl_boot)
17 __field(u32, cl_id)
18 __field(u32, si_id)
19 __field(u32, si_generation)
20 ),
21 TP_fast_assign(
22 __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
23 __entry->cl_id = stp->si_opaque.so_clid.cl_id;
24 __entry->si_id = stp->si_opaque.so_id;
25 __entry->si_generation = stp->si_generation;
26 ),
27 TP_printk("client %08x:%08x stateid %08x:%08x",
28 __entry->cl_boot,
29 __entry->cl_id,
30 __entry->si_id,
31 __entry->si_generation)
32)
33
34#define DEFINE_STATEID_EVENT(name) \
35DEFINE_EVENT(nfsd_stateid_class, name, \
36 TP_PROTO(stateid_t *stp), \
37 TP_ARGS(stp))
38DEFINE_STATEID_EVENT(layoutstate_alloc);
39DEFINE_STATEID_EVENT(layoutstate_unhash);
40DEFINE_STATEID_EVENT(layoutstate_free);
41DEFINE_STATEID_EVENT(layout_get_lookup_fail);
42DEFINE_STATEID_EVENT(layout_commit_lookup_fail);
43DEFINE_STATEID_EVENT(layout_return_lookup_fail);
44DEFINE_STATEID_EVENT(layout_recall);
45DEFINE_STATEID_EVENT(layout_recall_done);
46DEFINE_STATEID_EVENT(layout_recall_fail);
47DEFINE_STATEID_EVENT(layout_recall_release);
48
49#endif /* _NFSD_TRACE_H */
50
51#undef TRACE_INCLUDE_PATH
52#define TRACE_INCLUDE_PATH .
53#define TRACE_INCLUDE_FILE trace
54#include <trace/define_trace.h>
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 90a5925bd6ab..0bda93e58e1b 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,61 @@ struct nfsd4_reclaim_complete {
428 u32 rca_one_fs; 428 u32 rca_one_fs;
429}; 429};
430 430
431struct nfsd4_deviceid {
432 u64 fsid_idx;
433 u32 generation;
434 u32 pad;
435};
436
437struct nfsd4_layout_seg {
438 u32 iomode;
439 u64 offset;
440 u64 length;
441};
442
443struct nfsd4_getdeviceinfo {
444 struct nfsd4_deviceid gd_devid; /* request */
445 u32 gd_layout_type; /* request */
446 u32 gd_maxcount; /* request */
447 u32 gd_notify_types;/* request - response */
448 void *gd_device; /* response */
449};
450
451struct nfsd4_layoutget {
452 u64 lg_minlength; /* request */
453 u32 lg_signal; /* request */
454 u32 lg_layout_type; /* request */
455 u32 lg_maxcount; /* request */
456 stateid_t lg_sid; /* request/response */
457 struct nfsd4_layout_seg lg_seg; /* request/response */
458 void *lg_content; /* response */
459};
460
461struct nfsd4_layoutcommit {
462 stateid_t lc_sid; /* request */
463 struct nfsd4_layout_seg lc_seg; /* request */
464 u32 lc_reclaim; /* request */
465 u32 lc_newoffset; /* request */
466 u64 lc_last_wr; /* request */
467 struct timespec lc_mtime; /* request */
468 u32 lc_layout_type; /* request */
469 u32 lc_up_len; /* layout length */
470 void *lc_up_layout; /* decoded by callback */
471 u32 lc_size_chg; /* boolean for response */
472 u64 lc_newsize; /* response */
473};
474
475struct nfsd4_layoutreturn {
476 u32 lr_return_type; /* request */
477 u32 lr_layout_type; /* request */
478 struct nfsd4_layout_seg lr_seg; /* request */
479 u32 lr_reclaim; /* request */
480 u32 lrf_body_len; /* request */
481 void *lrf_body; /* request */
482 stateid_t lr_sid; /* request/response */
483 u32 lrs_present; /* response */
484};
485
431struct nfsd4_fallocate { 486struct nfsd4_fallocate {
432 /* request */ 487 /* request */
433 stateid_t falloc_stateid; 488 stateid_t falloc_stateid;
@@ -491,6 +546,10 @@ struct nfsd4_op {
491 struct nfsd4_reclaim_complete reclaim_complete; 546 struct nfsd4_reclaim_complete reclaim_complete;
492 struct nfsd4_test_stateid test_stateid; 547 struct nfsd4_test_stateid test_stateid;
493 struct nfsd4_free_stateid free_stateid; 548 struct nfsd4_free_stateid free_stateid;
549 struct nfsd4_getdeviceinfo getdeviceinfo;
550 struct nfsd4_layoutget layoutget;
551 struct nfsd4_layoutcommit layoutcommit;
552 struct nfsd4_layoutreturn layoutreturn;
494 553
495 /* NFSv4.2 */ 554 /* NFSv4.2 */
496 struct nfsd4_fallocate allocate; 555 struct nfsd4_fallocate allocate;
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index c5c55dfb91a9..c47f6fdb111a 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -21,3 +21,10 @@
21#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ 21#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
22 cb_sequence_dec_sz + \ 22 cb_sequence_dec_sz + \
23 op_dec_sz) 23 op_dec_sz)
24#define NFS4_enc_cb_layout_sz (cb_compound_enc_hdr_sz + \
25 cb_sequence_enc_sz + \
26 1 + 3 + \
27 enc_nfs4_fh_sz + 4)
28#define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \
29 cb_sequence_dec_sz + \
30 op_dec_sz)
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 3a03e0aea1fb..a8c728acb7a8 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -128,7 +128,6 @@ static const struct vm_operations_struct nilfs_file_vm_ops = {
128 .fault = filemap_fault, 128 .fault = filemap_fault,
129 .map_pages = filemap_map_pages, 129 .map_pages = filemap_map_pages,
130 .page_mkwrite = nilfs_page_mkwrite, 130 .page_mkwrite = nilfs_page_mkwrite,
131 .remap_pages = generic_file_remap_pages,
132}; 131};
133 132
134static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) 133static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 57ceaf33d177..748ca238915a 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -172,7 +172,6 @@ int nilfs_init_gcinode(struct inode *inode)
172 inode->i_mode = S_IFREG; 172 inode->i_mode = S_IFREG;
173 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 173 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
174 inode->i_mapping->a_ops = &empty_aops; 174 inode->i_mapping->a_ops = &empty_aops;
175 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
176 175
177 ii->i_flags = 0; 176 ii->i_flags = 0;
178 nilfs_bmap_init_gc(ii->i_bmap); 177 nilfs_bmap_init_gc(ii->i_bmap);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index c4dcd1db57ee..892cf5ffdb8e 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -429,7 +429,6 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
429 429
430 inode->i_mode = S_IFREG; 430 inode->i_mode = S_IFREG;
431 mapping_set_gfp_mask(inode->i_mapping, gfp_mask); 431 mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
432 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
433 432
434 inode->i_op = &def_mdt_iops; 433 inode->i_op = &def_mdt_iops;
435 inode->i_fop = &def_mdt_fops; 434 inode->i_fop = &def_mdt_fops;
@@ -457,13 +456,12 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
457 struct nilfs_shadow_map *shadow) 456 struct nilfs_shadow_map *shadow)
458{ 457{
459 struct nilfs_mdt_info *mi = NILFS_MDT(inode); 458 struct nilfs_mdt_info *mi = NILFS_MDT(inode);
460 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
461 459
462 INIT_LIST_HEAD(&shadow->frozen_buffers); 460 INIT_LIST_HEAD(&shadow->frozen_buffers);
463 address_space_init_once(&shadow->frozen_data); 461 address_space_init_once(&shadow->frozen_data);
464 nilfs_mapping_init(&shadow->frozen_data, inode, bdi); 462 nilfs_mapping_init(&shadow->frozen_data, inode);
465 address_space_init_once(&shadow->frozen_btnodes); 463 address_space_init_once(&shadow->frozen_btnodes);
466 nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi); 464 nilfs_mapping_init(&shadow->frozen_btnodes, inode);
467 mi->mi_shadow = shadow; 465 mi->mi_shadow = shadow;
468 return 0; 466 return 0;
469} 467}
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 91093cd74f0d..385704027575 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -141,7 +141,6 @@ enum {
141 * @ti_save: Backup of journal_info field of task_struct 141 * @ti_save: Backup of journal_info field of task_struct
142 * @ti_flags: Flags 142 * @ti_flags: Flags
143 * @ti_count: Nest level 143 * @ti_count: Nest level
144 * @ti_garbage: List of inode to be put when releasing semaphore
145 */ 144 */
146struct nilfs_transaction_info { 145struct nilfs_transaction_info {
147 u32 ti_magic; 146 u32 ti_magic;
@@ -150,7 +149,6 @@ struct nilfs_transaction_info {
150 one of other filesystems has a bug. */ 149 one of other filesystems has a bug. */
151 unsigned short ti_flags; 150 unsigned short ti_flags;
152 unsigned short ti_count; 151 unsigned short ti_count;
153 struct list_head ti_garbage;
154}; 152};
155 153
156/* ti_magic */ 154/* ti_magic */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index da276640f776..700ecbcca55d 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -461,14 +461,12 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
461 return nc; 461 return nc;
462} 462}
463 463
464void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, 464void nilfs_mapping_init(struct address_space *mapping, struct inode *inode)
465 struct backing_dev_info *bdi)
466{ 465{
467 mapping->host = inode; 466 mapping->host = inode;
468 mapping->flags = 0; 467 mapping->flags = 0;
469 mapping_set_gfp_mask(mapping, GFP_NOFS); 468 mapping_set_gfp_mask(mapping, GFP_NOFS);
470 mapping->private_data = NULL; 469 mapping->private_data = NULL;
471 mapping->backing_dev_info = bdi;
472 mapping->a_ops = &empty_aops; 470 mapping->a_ops = &empty_aops;
473} 471}
474 472
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index ef30c5c2426f..a43b8287d012 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -57,8 +57,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
57void nilfs_copy_back_pages(struct address_space *, struct address_space *); 57void nilfs_copy_back_pages(struct address_space *, struct address_space *);
58void nilfs_clear_dirty_page(struct page *, bool); 58void nilfs_clear_dirty_page(struct page *, bool);
59void nilfs_clear_dirty_pages(struct address_space *, bool); 59void nilfs_clear_dirty_pages(struct address_space *, bool);
60void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, 60void nilfs_mapping_init(struct address_space *mapping, struct inode *inode);
61 struct backing_dev_info *bdi);
62unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 61unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
63unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 62unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
64 sector_t start_blk, 63 sector_t start_blk,
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 7ef18fc656c2..469086b9f99b 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -305,7 +305,6 @@ static void nilfs_transaction_lock(struct super_block *sb,
305 ti->ti_count = 0; 305 ti->ti_count = 0;
306 ti->ti_save = cur_ti; 306 ti->ti_save = cur_ti;
307 ti->ti_magic = NILFS_TI_MAGIC; 307 ti->ti_magic = NILFS_TI_MAGIC;
308 INIT_LIST_HEAD(&ti->ti_garbage);
309 current->journal_info = ti; 308 current->journal_info = ti;
310 309
311 for (;;) { 310 for (;;) {
@@ -332,8 +331,6 @@ static void nilfs_transaction_unlock(struct super_block *sb)
332 331
333 up_write(&nilfs->ns_segctor_sem); 332 up_write(&nilfs->ns_segctor_sem);
334 current->journal_info = ti->ti_save; 333 current->journal_info = ti->ti_save;
335 if (!list_empty(&ti->ti_garbage))
336 nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
337} 334}
338 335
339static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 336static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -746,6 +743,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs,
746 } 743 }
747} 744}
748 745
746static void nilfs_iput_work_func(struct work_struct *work)
747{
748 struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
749 sc_iput_work);
750 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
751
752 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
753}
754
749static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, 755static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
750 struct nilfs_root *root) 756 struct nilfs_root *root)
751{ 757{
@@ -1900,8 +1906,8 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1900static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, 1906static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1901 struct the_nilfs *nilfs) 1907 struct the_nilfs *nilfs)
1902{ 1908{
1903 struct nilfs_transaction_info *ti = current->journal_info;
1904 struct nilfs_inode_info *ii, *n; 1909 struct nilfs_inode_info *ii, *n;
1910 int defer_iput = false;
1905 1911
1906 spin_lock(&nilfs->ns_inode_lock); 1912 spin_lock(&nilfs->ns_inode_lock);
1907 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 1913 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
@@ -1912,9 +1918,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1912 clear_bit(NILFS_I_BUSY, &ii->i_state); 1918 clear_bit(NILFS_I_BUSY, &ii->i_state);
1913 brelse(ii->i_bh); 1919 brelse(ii->i_bh);
1914 ii->i_bh = NULL; 1920 ii->i_bh = NULL;
1915 list_move_tail(&ii->i_dirty, &ti->ti_garbage); 1921 list_del_init(&ii->i_dirty);
1922 if (!ii->vfs_inode.i_nlink) {
1923 /*
1924 * Defer calling iput() to avoid a deadlock
1925 * over I_SYNC flag for inodes with i_nlink == 0
1926 */
1927 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
1928 defer_iput = true;
1929 } else {
1930 spin_unlock(&nilfs->ns_inode_lock);
1931 iput(&ii->vfs_inode);
1932 spin_lock(&nilfs->ns_inode_lock);
1933 }
1916 } 1934 }
1917 spin_unlock(&nilfs->ns_inode_lock); 1935 spin_unlock(&nilfs->ns_inode_lock);
1936
1937 if (defer_iput)
1938 schedule_work(&sci->sc_iput_work);
1918} 1939}
1919 1940
1920/* 1941/*
@@ -2583,6 +2604,8 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2583 INIT_LIST_HEAD(&sci->sc_segbufs); 2604 INIT_LIST_HEAD(&sci->sc_segbufs);
2584 INIT_LIST_HEAD(&sci->sc_write_logs); 2605 INIT_LIST_HEAD(&sci->sc_write_logs);
2585 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2606 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2607 INIT_LIST_HEAD(&sci->sc_iput_queue);
2608 INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
2586 init_timer(&sci->sc_timer); 2609 init_timer(&sci->sc_timer);
2587 2610
2588 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2611 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2609,6 +2632,8 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2609 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2632 ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2610 nilfs_transaction_unlock(sci->sc_super); 2633 nilfs_transaction_unlock(sci->sc_super);
2611 2634
2635 flush_work(&sci->sc_iput_work);
2636
2612 } while (ret && retrycount-- > 0); 2637 } while (ret && retrycount-- > 0);
2613} 2638}
2614 2639
@@ -2633,6 +2658,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2633 || sci->sc_seq_request != sci->sc_seq_done); 2658 || sci->sc_seq_request != sci->sc_seq_done);
2634 spin_unlock(&sci->sc_state_lock); 2659 spin_unlock(&sci->sc_state_lock);
2635 2660
2661 if (flush_work(&sci->sc_iput_work))
2662 flag = true;
2663
2636 if (flag || !nilfs_segctor_confirm(sci)) 2664 if (flag || !nilfs_segctor_confirm(sci))
2637 nilfs_segctor_write_out(sci); 2665 nilfs_segctor_write_out(sci);
2638 2666
@@ -2642,6 +2670,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2642 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); 2670 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2643 } 2671 }
2644 2672
2673 if (!list_empty(&sci->sc_iput_queue)) {
2674 nilfs_warning(sci->sc_super, __func__,
2675 "iput queue is not empty\n");
2676 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
2677 }
2678
2645 WARN_ON(!list_empty(&sci->sc_segbufs)); 2679 WARN_ON(!list_empty(&sci->sc_segbufs));
2646 WARN_ON(!list_empty(&sci->sc_write_logs)); 2680 WARN_ON(!list_empty(&sci->sc_write_logs));
2647 2681
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 38a1d0013314..a48d6de1e02c 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -26,6 +26,7 @@
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/workqueue.h>
29#include <linux/nilfs2_fs.h> 30#include <linux/nilfs2_fs.h>
30#include "nilfs.h" 31#include "nilfs.h"
31 32
@@ -92,6 +93,8 @@ struct nilfs_segsum_pointer {
92 * @sc_nblk_inc: Block count of current generation 93 * @sc_nblk_inc: Block count of current generation
93 * @sc_dirty_files: List of files to be written 94 * @sc_dirty_files: List of files to be written
94 * @sc_gc_inodes: List of GC inodes having blocks to be written 95 * @sc_gc_inodes: List of GC inodes having blocks to be written
96 * @sc_iput_queue: list of inodes for which iput should be done
97 * @sc_iput_work: work struct to defer iput call
95 * @sc_freesegs: array of segment numbers to be freed 98 * @sc_freesegs: array of segment numbers to be freed
96 * @sc_nfreesegs: number of segments on @sc_freesegs 99 * @sc_nfreesegs: number of segments on @sc_freesegs
97 * @sc_dsync_inode: inode whose data pages are written for a sync operation 100 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -135,6 +138,8 @@ struct nilfs_sc_info {
135 138
136 struct list_head sc_dirty_files; 139 struct list_head sc_dirty_files;
137 struct list_head sc_gc_inodes; 140 struct list_head sc_gc_inodes;
141 struct list_head sc_iput_queue;
142 struct work_struct sc_iput_work;
138 143
139 __u64 *sc_freesegs; 144 __u64 *sc_freesegs;
140 size_t sc_nfreesegs; 145 size_t sc_nfreesegs;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 2e5b3ec85b8f..5bc2a1cf73c3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -166,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
166 ii->i_state = 0; 166 ii->i_state = 0;
167 ii->i_cno = 0; 167 ii->i_cno = 0;
168 ii->vfs_inode.i_version = 1; 168 ii->vfs_inode.i_version = 1;
169 nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi); 169 nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode);
170 return &ii->vfs_inode; 170 return &ii->vfs_inode;
171} 171}
172 172
@@ -1057,7 +1057,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1057{ 1057{
1058 struct the_nilfs *nilfs; 1058 struct the_nilfs *nilfs;
1059 struct nilfs_root *fsroot; 1059 struct nilfs_root *fsroot;
1060 struct backing_dev_info *bdi;
1061 __u64 cno; 1060 __u64 cno;
1062 int err; 1061 int err;
1063 1062
@@ -1077,8 +1076,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1077 sb->s_time_gran = 1; 1076 sb->s_time_gran = 1;
1078 sb->s_max_links = NILFS_LINK_MAX; 1077 sb->s_max_links = NILFS_LINK_MAX;
1079 1078
1080 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 1079 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
1081 sb->s_bdi = bdi ? : &default_backing_dev_info;
1082 1080
1083 err = load_nilfs(nilfs, sb); 1081 err = load_nilfs(nilfs, sb);
1084 if (err) 1082 if (err)
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 22c629eedd82..2a24249b30af 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,5 +1,6 @@
1config FSNOTIFY 1config FSNOTIFY
2 def_bool n 2 def_bool n
3 select SRCU
3 4
4source "fs/notify/dnotify/Kconfig" 5source "fs/notify/dnotify/Kconfig"
5source "fs/notify/inotify/Kconfig" 6source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 30d3addfad75..51ceb8107284 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -140,7 +140,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
140 } 140 }
141 141
142 if (S_ISDIR(path->dentry->d_inode->i_mode) && 142 if (S_ISDIR(path->dentry->d_inode->i_mode) &&
143 (marks_ignored_mask & FS_ISDIR)) 143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
144 return false; 144 return false;
145 145
146 if (event_mask & marks_mask & ~marks_ignored_mask) 146 if (event_mask & marks_mask & ~marks_ignored_mask)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bff8567aa42d..cf275500a665 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -487,20 +487,27 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
487 unsigned int flags, 487 unsigned int flags,
488 int *destroy) 488 int *destroy)
489{ 489{
490 __u32 oldmask; 490 __u32 oldmask = 0;
491 491
492 spin_lock(&fsn_mark->lock); 492 spin_lock(&fsn_mark->lock);
493 if (!(flags & FAN_MARK_IGNORED_MASK)) { 493 if (!(flags & FAN_MARK_IGNORED_MASK)) {
494 __u32 tmask = fsn_mark->mask & ~mask;
495
496 if (flags & FAN_MARK_ONDIR)
497 tmask &= ~FAN_ONDIR;
498
494 oldmask = fsn_mark->mask; 499 oldmask = fsn_mark->mask;
495 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask)); 500 fsnotify_set_mark_mask_locked(fsn_mark, tmask);
496 } else { 501 } else {
497 oldmask = fsn_mark->ignored_mask; 502 __u32 tmask = fsn_mark->ignored_mask & ~mask;
498 fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask)); 503 if (flags & FAN_MARK_ONDIR)
504 tmask &= ~FAN_ONDIR;
505
506 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
499 } 507 }
508 *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
500 spin_unlock(&fsn_mark->lock); 509 spin_unlock(&fsn_mark->lock);
501 510
502 *destroy = !(oldmask & ~mask);
503
504 return mask & oldmask; 511 return mask & oldmask;
505} 512}
506 513
@@ -569,20 +576,22 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
569 576
570 spin_lock(&fsn_mark->lock); 577 spin_lock(&fsn_mark->lock);
571 if (!(flags & FAN_MARK_IGNORED_MASK)) { 578 if (!(flags & FAN_MARK_IGNORED_MASK)) {
579 __u32 tmask = fsn_mark->mask | mask;
580
581 if (flags & FAN_MARK_ONDIR)
582 tmask |= FAN_ONDIR;
583
572 oldmask = fsn_mark->mask; 584 oldmask = fsn_mark->mask;
573 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); 585 fsnotify_set_mark_mask_locked(fsn_mark, tmask);
574 } else { 586 } else {
575 __u32 tmask = fsn_mark->ignored_mask | mask; 587 __u32 tmask = fsn_mark->ignored_mask | mask;
588 if (flags & FAN_MARK_ONDIR)
589 tmask |= FAN_ONDIR;
590
576 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); 591 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
577 if (flags & FAN_MARK_IGNORED_SURV_MODIFY) 592 if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
578 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; 593 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
579 } 594 }
580
581 if (!(flags & FAN_MARK_ONDIR)) {
582 __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
583 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
584 }
585
586 spin_unlock(&fsn_mark->lock); 595 spin_unlock(&fsn_mark->lock);
587 596
588 return mask & ~oldmask; 597 return mask & ~oldmask;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 643faa44f22b..1da9b2d184dc 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -19,6 +19,7 @@
19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22#include <linux/backing-dev.h>
22#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
23#include <linux/gfp.h> 24#include <linux/gfp.h>
24#include <linux/pagemap.h> 25#include <linux/pagemap.h>
@@ -2091,7 +2092,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2091 count = iov_length(iov, nr_segs); 2092 count = iov_length(iov, nr_segs);
2092 pos = *ppos; 2093 pos = *ppos;
2093 /* We can write back this queue in page reclaim. */ 2094 /* We can write back this queue in page reclaim. */
2094 current->backing_dev_info = mapping->backing_dev_info; 2095 current->backing_dev_info = inode_to_bdi(inode);
2095 written = 0; 2096 written = 0;
2096 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 2097 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2097 if (err) 2098 if (err)
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 7e8282dcea2a..c58a1bcfda0f 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -245,16 +245,14 @@ int ocfs2_set_acl(handle_t *handle,
245 ret = posix_acl_equiv_mode(acl, &mode); 245 ret = posix_acl_equiv_mode(acl, &mode);
246 if (ret < 0) 246 if (ret < 0)
247 return ret; 247 return ret;
248 else {
249 if (ret == 0)
250 acl = NULL;
251 248
252 ret = ocfs2_acl_set_mode(inode, di_bh, 249 if (ret == 0)
253 handle, mode); 250 acl = NULL;
254 if (ret)
255 return ret;
256 251
257 } 252 ret = ocfs2_acl_set_mode(inode, di_bh,
253 handle, mode);
254 if (ret)
255 return ret;
258 } 256 }
259 break; 257 break;
260 case ACL_TYPE_DEFAULT: 258 case ACL_TYPE_DEFAULT:
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index fcae9ef1a328..044158bd22be 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6873,7 +6873,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6873 if (IS_ERR(handle)) { 6873 if (IS_ERR(handle)) {
6874 ret = PTR_ERR(handle); 6874 ret = PTR_ERR(handle);
6875 mlog_errno(ret); 6875 mlog_errno(ret);
6876 goto out_unlock; 6876 goto out;
6877 } 6877 }
6878 6878
6879 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 6879 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
@@ -6931,7 +6931,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6931 if (ret) { 6931 if (ret) {
6932 mlog_errno(ret); 6932 mlog_errno(ret);
6933 need_free = 1; 6933 need_free = 1;
6934 goto out_commit; 6934 goto out_unlock;
6935 } 6935 }
6936 6936
6937 page_end = PAGE_CACHE_SIZE; 6937 page_end = PAGE_CACHE_SIZE;
@@ -6964,12 +6964,16 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6964 if (ret) { 6964 if (ret) {
6965 mlog_errno(ret); 6965 mlog_errno(ret);
6966 need_free = 1; 6966 need_free = 1;
6967 goto out_commit; 6967 goto out_unlock;
6968 } 6968 }
6969 6969
6970 inode->i_blocks = ocfs2_inode_sector_count(inode); 6970 inode->i_blocks = ocfs2_inode_sector_count(inode);
6971 } 6971 }
6972 6972
6973out_unlock:
6974 if (pages)
6975 ocfs2_unlock_and_free_pages(pages, num_pages);
6976
6973out_commit: 6977out_commit:
6974 if (ret < 0 && did_quota) 6978 if (ret < 0 && did_quota)
6975 dquot_free_space_nodirty(inode, 6979 dquot_free_space_nodirty(inode,
@@ -6989,15 +6993,11 @@ out_commit:
6989 6993
6990 ocfs2_commit_trans(osb, handle); 6994 ocfs2_commit_trans(osb, handle);
6991 6995
6992out_unlock: 6996out:
6993 if (data_ac) 6997 if (data_ac)
6994 ocfs2_free_alloc_context(data_ac); 6998 ocfs2_free_alloc_context(data_ac);
6995 6999 if (pages)
6996out:
6997 if (pages) {
6998 ocfs2_unlock_and_free_pages(pages, num_pages);
6999 kfree(pages); 7000 kfree(pages);
7000 }
7001 7001
7002 return ret; 7002 return ret;
7003} 7003}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2e355e0f8335..56c403a563bc 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1016,7 +1016,8 @@ void o2net_fill_node_map(unsigned long *map, unsigned bytes)
1016 1016
1017 memset(map, 0, bytes); 1017 memset(map, 0, bytes);
1018 for (node = 0; node < O2NM_MAX_NODES; ++node) { 1018 for (node = 0; node < O2NM_MAX_NODES; ++node) {
1019 o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); 1019 if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret))
1020 continue;
1020 if (!ret) { 1021 if (!ret) {
1021 set_bit(node, map); 1022 set_bit(node, map);
1022 sc_put(sc); 1023 sc_put(sc);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index dc024367110a..b95e7df5b76a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -107,12 +107,12 @@ struct o2net_node {
107 struct list_head nn_status_list; 107 struct list_head nn_status_list;
108 108
109 /* connects are attempted from when heartbeat comes up until either hb 109 /* connects are attempted from when heartbeat comes up until either hb
110 * goes down, the node is unconfigured, no connect attempts succeed 110 * goes down, the node is unconfigured, or a connect succeeds.
111 * before O2NET_CONN_IDLE_DELAY, or a connect succeeds. connect_work 111 * connect_work is queued from set_nn_state both from hb up and from
112 * is queued from set_nn_state both from hb up and from itself if a 112 * itself if a connect attempt fails and so can be self-arming.
113 * connect attempt fails and so can be self-arming. shutdown is 113 * shutdown is careful to first mark the nn such that no connects will
114 * careful to first mark the nn such that no connects will be attempted 114 * be attempted before canceling delayed connect work and flushing the
115 * before canceling delayed connect work and flushing the queue. */ 115 * queue. */
116 struct delayed_work nn_connect_work; 116 struct delayed_work nn_connect_work;
117 unsigned long nn_last_connect_attempt; 117 unsigned long nn_last_connect_attempt;
118 118
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 319e786175af..b08050bd3f2e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3456,10 +3456,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
3456 int blocksize = dir->i_sb->s_blocksize; 3456 int blocksize = dir->i_sb->s_blocksize;
3457 3457
3458 status = ocfs2_read_dir_block(dir, 0, &bh, 0); 3458 status = ocfs2_read_dir_block(dir, 0, &bh, 0);
3459 if (status) { 3459 if (status)
3460 mlog_errno(status);
3461 goto bail; 3460 goto bail;
3462 }
3463 3461
3464 rec_len = OCFS2_DIR_REC_LEN(namelen); 3462 rec_len = OCFS2_DIR_REC_LEN(namelen);
3465 offset = 0; 3463 offset = 0;
@@ -3480,10 +3478,9 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
3480 status = ocfs2_read_dir_block(dir, 3478 status = ocfs2_read_dir_block(dir,
3481 offset >> sb->s_blocksize_bits, 3479 offset >> sb->s_blocksize_bits,
3482 &bh, 0); 3480 &bh, 0);
3483 if (status) { 3481 if (status)
3484 mlog_errno(status);
3485 goto bail; 3482 goto bail;
3486 } 3483
3487 /* move to next block */ 3484 /* move to next block */
3488 de = (struct ocfs2_dir_entry *) bh->b_data; 3485 de = (struct ocfs2_dir_entry *) bh->b_data;
3489 } 3486 }
@@ -3513,7 +3510,6 @@ next:
3513 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); 3510 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
3514 } 3511 }
3515 3512
3516 status = 0;
3517bail: 3513bail:
3518 brelse(bh); 3514 brelse(bh);
3519 if (status) 3515 if (status)
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index b46278f9ae44..fd6bbbbd7d78 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -385,8 +385,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
385 head = &res->granted; 385 head = &res->granted;
386 386
387 list_for_each_entry(lock, head, list) { 387 list_for_each_entry(lock, head, list) {
388 if (lock->ml.cookie == cookie) 388 /* if lock is found but unlock is pending ignore the bast */
389 if (lock->ml.cookie == cookie) {
390 if (lock->unlock_pending)
391 break;
389 goto do_ast; 392 goto do_ast;
393 }
390 } 394 }
391 395
392 mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " 396 mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 149eb556b8c6..825136070d2c 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -406,7 +406,7 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
406 } 406 }
407 spin_unlock(&dlm->spinlock); 407 spin_unlock(&dlm->spinlock);
408 408
409 out += snprintf(buf + out, len - out, "Total on list: %ld\n", total); 409 out += snprintf(buf + out, len - out, "Total on list: %lu\n", total);
410 410
411 return out; 411 return out;
412} 412}
@@ -464,7 +464,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
464 spin_unlock(&dlm->master_lock); 464 spin_unlock(&dlm->master_lock);
465 465
466 out += snprintf(buf + out, len - out, 466 out += snprintf(buf + out, len - out,
467 "Total: %ld, Longest: %ld\n", total, longest); 467 "Total: %lu, Longest: %lu\n", total, longest);
468 return out; 468 return out;
469} 469}
470 470
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 50a59d2337b2..7df88a6dd626 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -674,20 +674,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
674 spin_unlock(&dlm->spinlock); 674 spin_unlock(&dlm->spinlock);
675} 675}
676 676
677int dlm_joined(struct dlm_ctxt *dlm)
678{
679 int ret = 0;
680
681 spin_lock(&dlm_domain_lock);
682
683 if (dlm->dlm_state == DLM_CTXT_JOINED)
684 ret = 1;
685
686 spin_unlock(&dlm_domain_lock);
687
688 return ret;
689}
690
691int dlm_shutting_down(struct dlm_ctxt *dlm) 677int dlm_shutting_down(struct dlm_ctxt *dlm)
692{ 678{
693 int ret = 0; 679 int ret = 0;
diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h
index 2f7f60bfeb3b..fd6122a38dbd 100644
--- a/fs/ocfs2/dlm/dlmdomain.h
+++ b/fs/ocfs2/dlm/dlmdomain.h
@@ -28,7 +28,6 @@
28extern spinlock_t dlm_domain_lock; 28extern spinlock_t dlm_domain_lock;
29extern struct list_head dlm_domains; 29extern struct list_head dlm_domains;
30 30
31int dlm_joined(struct dlm_ctxt *dlm);
32int dlm_shutting_down(struct dlm_ctxt *dlm); 31int dlm_shutting_down(struct dlm_ctxt *dlm);
33void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, 32void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
34 int node_num); 33 int node_num);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index cecd875653e4..ce12e0b1a31f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1070,6 +1070,9 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
1070 dead_node, dlm->name); 1070 dead_node, dlm->name);
1071 list_del_init(&lock->list); 1071 list_del_init(&lock->list);
1072 dlm_lock_put(lock); 1072 dlm_lock_put(lock);
1073 /* Can't schedule DLM_UNLOCK_FREE_LOCK
1074 * - do manually */
1075 dlm_lock_put(lock);
1073 break; 1076 break;
1074 } 1077 }
1075 } 1078 }
@@ -2346,6 +2349,10 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2346 dead_node, dlm->name); 2349 dead_node, dlm->name);
2347 list_del_init(&lock->list); 2350 list_del_init(&lock->list);
2348 dlm_lock_put(lock); 2351 dlm_lock_put(lock);
2352 /* Can't schedule
2353 * DLM_UNLOCK_FREE_LOCK
2354 * - do manually */
2355 dlm_lock_put(lock);
2349 break; 2356 break;
2350 } 2357 }
2351 } 2358 }
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 57c40e34f56f..061ba6a91bf2 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -390,12 +390,6 @@ clear_fields:
390 ip->ip_conn = NULL; 390 ip->ip_conn = NULL;
391} 391}
392 392
393static struct backing_dev_info dlmfs_backing_dev_info = {
394 .name = "ocfs2-dlmfs",
395 .ra_pages = 0, /* No readahead */
396 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
397};
398
399static struct inode *dlmfs_get_root_inode(struct super_block *sb) 393static struct inode *dlmfs_get_root_inode(struct super_block *sb)
400{ 394{
401 struct inode *inode = new_inode(sb); 395 struct inode *inode = new_inode(sb);
@@ -404,7 +398,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
404 if (inode) { 398 if (inode) {
405 inode->i_ino = get_next_ino(); 399 inode->i_ino = get_next_ino();
406 inode_init_owner(inode, NULL, mode); 400 inode_init_owner(inode, NULL, mode);
407 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
408 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 401 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
409 inc_nlink(inode); 402 inc_nlink(inode);
410 403
@@ -428,7 +421,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
428 421
429 inode->i_ino = get_next_ino(); 422 inode->i_ino = get_next_ino();
430 inode_init_owner(inode, parent, mode); 423 inode_init_owner(inode, parent, mode);
431 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
432 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 424 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
433 425
434 ip = DLMFS_I(inode); 426 ip = DLMFS_I(inode);
@@ -643,10 +635,6 @@ static int __init init_dlmfs_fs(void)
643 int status; 635 int status;
644 int cleanup_inode = 0, cleanup_worker = 0; 636 int cleanup_inode = 0, cleanup_worker = 0;
645 637
646 status = bdi_init(&dlmfs_backing_dev_info);
647 if (status)
648 return status;
649
650 dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", 638 dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
651 sizeof(struct dlmfs_inode_private), 639 sizeof(struct dlmfs_inode_private),
652 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 640 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -673,7 +661,6 @@ bail:
673 kmem_cache_destroy(dlmfs_inode_cache); 661 kmem_cache_destroy(dlmfs_inode_cache);
674 if (cleanup_worker) 662 if (cleanup_worker)
675 destroy_workqueue(user_dlm_worker); 663 destroy_workqueue(user_dlm_worker);
676 bdi_destroy(&dlmfs_backing_dev_info);
677 } else 664 } else
678 printk("OCFS2 User DLM kernel interface loaded\n"); 665 printk("OCFS2 User DLM kernel interface loaded\n");
679 return status; 666 return status;
@@ -693,7 +680,6 @@ static void __exit exit_dlmfs_fs(void)
693 rcu_barrier(); 680 rcu_barrier();
694 kmem_cache_destroy(dlmfs_inode_cache); 681 kmem_cache_destroy(dlmfs_inode_cache);
695 682
696 bdi_destroy(&dlmfs_backing_dev_info);
697} 683}
698 684
699MODULE_AUTHOR("Oracle"); 685MODULE_AUTHOR("Oracle");
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1c423af04c69..11849a44dc5a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3750,6 +3750,9 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3750 break; 3750 break;
3751 spin_unlock(&dentry_attach_lock); 3751 spin_unlock(&dentry_attach_lock);
3752 3752
3753 if (S_ISDIR(dl->dl_inode->i_mode))
3754 shrink_dcache_parent(dentry);
3755
3753 mlog(0, "d_delete(%pd);\n", dentry); 3756 mlog(0, "d_delete(%pd);\n", dentry);
3754 3757
3755 /* 3758 /*
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3950693dd0f6..e0f04d55fd05 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -569,7 +569,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
569 handle_t *handle = NULL; 569 handle_t *handle = NULL;
570 struct ocfs2_alloc_context *data_ac = NULL; 570 struct ocfs2_alloc_context *data_ac = NULL;
571 struct ocfs2_alloc_context *meta_ac = NULL; 571 struct ocfs2_alloc_context *meta_ac = NULL;
572 enum ocfs2_alloc_restarted why; 572 enum ocfs2_alloc_restarted why = RESTART_NONE;
573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
574 struct ocfs2_extent_tree et; 574 struct ocfs2_extent_tree et;
575 int did_quota = 0; 575 int did_quota = 0;
@@ -2363,7 +2363,7 @@ relock:
2363 goto out_dio; 2363 goto out_dio;
2364 } 2364 }
2365 } else { 2365 } else {
2366 current->backing_dev_info = file->f_mapping->backing_dev_info; 2366 current->backing_dev_info = inode_to_bdi(inode);
2367 written = generic_perform_write(file, from, *ppos); 2367 written = generic_perform_write(file, from, *ppos);
2368 if (likely(written >= 0)) 2368 if (likely(written >= 0))
2369 iocb->ki_pos = *ppos + written; 2369 iocb->ki_pos = *ppos + written;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4f502382180f..d10860fde165 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1447,7 +1447,6 @@ bail:
1447 * requires that we call do_exit(). And it isn't exported, but 1447 * requires that we call do_exit(). And it isn't exported, but
1448 * complete_and_exit() seems to be a minimal wrapper around it. */ 1448 * complete_and_exit() seems to be a minimal wrapper around it. */
1449 complete_and_exit(NULL, status); 1449 complete_and_exit(NULL, status);
1450 return status;
1451} 1450}
1452 1451
1453void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) 1452void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 10d66c75cecb..9581d190f6e1 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -173,7 +173,6 @@ out:
173static const struct vm_operations_struct ocfs2_file_vm_ops = { 173static const struct vm_operations_struct ocfs2_file_vm_ops = {
174 .fault = ocfs2_fault, 174 .fault = ocfs2_fault,
175 .page_mkwrite = ocfs2_page_mkwrite, 175 .page_mkwrite = ocfs2_page_mkwrite,
176 .remap_pages = generic_file_remap_pages,
177}; 176};
178 177
179int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) 178int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7d6b7d090452..fdbcbfed529e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -279,6 +279,8 @@ enum ocfs2_mount_options
279 writes */ 279 writes */
280 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ 280 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
281 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ 281 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
282
283 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
282}; 284};
283 285
284#define OCFS2_OSB_SOFT_RO 0x0001 286#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 1eae330193a6..b6d51333ad02 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -48,6 +48,7 @@ struct ocfs2_quota_recovery {
48/* In-memory structure with quota header information */ 48/* In-memory structure with quota header information */
49struct ocfs2_mem_dqinfo { 49struct ocfs2_mem_dqinfo {
50 unsigned int dqi_type; /* Quota type this structure describes */ 50 unsigned int dqi_type; /* Quota type this structure describes */
51 unsigned int dqi_flags; /* Flags OLQF_* */
51 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 52 unsigned int dqi_chunks; /* Number of chunks in local quota file */
52 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 53 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
53 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 54 unsigned int dqi_syncms; /* How often should we sync with other nodes */
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 10b653930ee2..3d0b63d34225 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -73,12 +73,6 @@ static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
73 ol_dqblk_block_off(sb, c, off); 73 ol_dqblk_block_off(sb, c, off);
74} 74}
75 75
76/* Compute block number from given offset */
77static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
78{
79 return off >> sb->s_blocksize_bits;
80}
81
82static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off) 76static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
83{ 77{
84 return off & ((1 << sb->s_blocksize_bits) - 1); 78 return off & ((1 << sb->s_blocksize_bits) - 1);
@@ -292,7 +286,7 @@ static void olq_update_info(struct buffer_head *bh, void *private)
292 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + 286 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
293 OCFS2_LOCAL_INFO_OFF); 287 OCFS2_LOCAL_INFO_OFF);
294 spin_lock(&dq_data_lock); 288 spin_lock(&dq_data_lock);
295 ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); 289 ldinfo->dqi_flags = cpu_to_le32(oinfo->dqi_flags);
296 ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); 290 ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
297 ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); 291 ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
298 spin_unlock(&dq_data_lock); 292 spin_unlock(&dq_data_lock);
@@ -701,8 +695,8 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
701 /* We don't need the lock and we have to acquire quota file locks 695 /* We don't need the lock and we have to acquire quota file locks
702 * which will later depend on this lock */ 696 * which will later depend on this lock */
703 mutex_unlock(&sb_dqopt(sb)->dqio_mutex); 697 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
704 info->dqi_maxblimit = 0x7fffffffffffffffLL; 698 info->dqi_max_spc_limit = 0x7fffffffffffffffLL;
705 info->dqi_maxilimit = 0x7fffffffffffffffLL; 699 info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
706 oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); 700 oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
707 if (!oinfo) { 701 if (!oinfo) {
708 mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota" 702 mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
@@ -737,13 +731,13 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
737 } 731 }
738 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + 732 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
739 OCFS2_LOCAL_INFO_OFF); 733 OCFS2_LOCAL_INFO_OFF);
740 info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); 734 oinfo->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
741 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); 735 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
742 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); 736 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
743 oinfo->dqi_libh = bh; 737 oinfo->dqi_libh = bh;
744 738
745 /* We crashed when using local quota file? */ 739 /* We crashed when using local quota file? */
746 if (!(info->dqi_flags & OLQF_CLEAN)) { 740 if (!(oinfo->dqi_flags & OLQF_CLEAN)) {
747 rec = OCFS2_SB(sb)->quota_rec; 741 rec = OCFS2_SB(sb)->quota_rec;
748 if (!rec) { 742 if (!rec) {
749 rec = ocfs2_alloc_quota_recovery(); 743 rec = ocfs2_alloc_quota_recovery();
@@ -772,7 +766,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
772 } 766 }
773 767
774 /* Now mark quota file as used */ 768 /* Now mark quota file as used */
775 info->dqi_flags &= ~OLQF_CLEAN; 769 oinfo->dqi_flags &= ~OLQF_CLEAN;
776 status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info); 770 status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info);
777 if (status < 0) { 771 if (status < 0) {
778 mlog_errno(status); 772 mlog_errno(status);
@@ -857,7 +851,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
857 goto out; 851 goto out;
858 852
859 /* Mark local file as clean */ 853 /* Mark local file as clean */
860 info->dqi_flags |= OLQF_CLEAN; 854 oinfo->dqi_flags |= OLQF_CLEAN;
861 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], 855 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
862 oinfo->dqi_libh, 856 oinfo->dqi_libh,
863 olq_update_info, 857 olq_update_info,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index d81f6e2a97f5..ee541f92dab4 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2428,8 +2428,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2428 get_bh(prev_bh); 2428 get_bh(prev_bh);
2429 } 2429 }
2430 2430
2431 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2432
2433 trace_ocfs2_calc_refcount_meta_credits_iterate( 2431 trace_ocfs2_calc_refcount_meta_credits_iterate(
2434 recs_add, (unsigned long long)cpos, clusters, 2432 recs_add, (unsigned long long)cpos, clusters,
2435 (unsigned long long)le64_to_cpu(rec.r_cpos), 2433 (unsigned long long)le64_to_cpu(rec.r_cpos),
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 41ffd36c689c..6a348b0294ab 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -39,7 +39,7 @@
39#define OCFS2_CHECK_RESERVATIONS 39#define OCFS2_CHECK_RESERVATIONS
40#endif 40#endif
41 41
42DEFINE_SPINLOCK(resv_lock); 42static DEFINE_SPINLOCK(resv_lock);
43 43
44#define OCFS2_MIN_RESV_WINDOW_BITS 8 44#define OCFS2_MIN_RESV_WINDOW_BITS 8
45#define OCFS2_MAX_RESV_WINDOW_BITS 1024 45#define OCFS2_MAX_RESV_WINDOW_BITS 1024
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 83723179e1ec..87a1f7679d9b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -191,6 +191,7 @@ enum {
191 Opt_coherency_full, 191 Opt_coherency_full,
192 Opt_resv_level, 192 Opt_resv_level,
193 Opt_dir_resv_level, 193 Opt_dir_resv_level,
194 Opt_journal_async_commit,
194 Opt_err, 195 Opt_err,
195}; 196};
196 197
@@ -222,6 +223,7 @@ static const match_table_t tokens = {
222 {Opt_coherency_full, "coherency=full"}, 223 {Opt_coherency_full, "coherency=full"},
223 {Opt_resv_level, "resv_level=%u"}, 224 {Opt_resv_level, "resv_level=%u"},
224 {Opt_dir_resv_level, "dir_resv_level=%u"}, 225 {Opt_dir_resv_level, "dir_resv_level=%u"},
226 {Opt_journal_async_commit, "journal_async_commit"},
225 {Opt_err, NULL} 227 {Opt_err, NULL}
226}; 228};
227 229
@@ -1000,36 +1002,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
1000 } 1002 }
1001} 1003}
1002 1004
1003/* Handle quota on quotactl */
1004static int ocfs2_quota_on(struct super_block *sb, int type, int format_id)
1005{
1006 unsigned int feature[OCFS2_MAXQUOTAS] = {
1007 OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
1008 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
1009
1010 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
1011 return -EINVAL;
1012
1013 return dquot_enable(sb_dqopt(sb)->files[type], type,
1014 format_id, DQUOT_LIMITS_ENABLED);
1015}
1016
1017/* Handle quota off quotactl */
1018static int ocfs2_quota_off(struct super_block *sb, int type)
1019{
1020 return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
1021}
1022
1023static const struct quotactl_ops ocfs2_quotactl_ops = {
1024 .quota_on_meta = ocfs2_quota_on,
1025 .quota_off = ocfs2_quota_off,
1026 .quota_sync = dquot_quota_sync,
1027 .get_info = dquot_get_dqinfo,
1028 .set_info = dquot_set_dqinfo,
1029 .get_dqblk = dquot_get_dqblk,
1030 .set_dqblk = dquot_set_dqblk,
1031};
1032
1033static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 1005static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1034{ 1006{
1035 struct dentry *root; 1007 struct dentry *root;
@@ -1500,6 +1472,9 @@ static int ocfs2_parse_options(struct super_block *sb,
1500 option < OCFS2_MAX_RESV_LEVEL) 1472 option < OCFS2_MAX_RESV_LEVEL)
1501 mopt->dir_resv_level = option; 1473 mopt->dir_resv_level = option;
1502 break; 1474 break;
1475 case Opt_journal_async_commit:
1476 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
1477 break;
1503 default: 1478 default:
1504 mlog(ML_ERROR, 1479 mlog(ML_ERROR,
1505 "Unrecognized mount option \"%s\" " 1480 "Unrecognized mount option \"%s\" "
@@ -1606,6 +1581,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
1606 if (osb->osb_dir_resv_level != osb->osb_resv_level) 1581 if (osb->osb_dir_resv_level != osb->osb_resv_level)
1607 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); 1582 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
1608 1583
1584 if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
1585 seq_printf(s, ",journal_async_commit");
1586
1609 return 0; 1587 return 0;
1610} 1588}
1611 1589
@@ -2079,7 +2057,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
2079 sb->s_op = &ocfs2_sops; 2057 sb->s_op = &ocfs2_sops;
2080 sb->s_d_op = &ocfs2_dentry_ops; 2058 sb->s_d_op = &ocfs2_dentry_ops;
2081 sb->s_export_op = &ocfs2_export_ops; 2059 sb->s_export_op = &ocfs2_export_ops;
2082 sb->s_qcop = &ocfs2_quotactl_ops; 2060 sb->s_qcop = &dquot_quotactl_sysfile_ops;
2083 sb->dq_op = &ocfs2_quota_operations; 2061 sb->dq_op = &ocfs2_quota_operations;
2084 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; 2062 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
2085 sb->s_xattr = ocfs2_xattr_handlers; 2063 sb->s_xattr = ocfs2_xattr_handlers;
@@ -2475,6 +2453,15 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2475 goto finally; 2453 goto finally;
2476 } 2454 }
2477 2455
2456 if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
2457 jbd2_journal_set_features(osb->journal->j_journal,
2458 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2459 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2460 else
2461 jbd2_journal_clear_features(osb->journal->j_journal,
2462 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2463 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2464
2478 if (dirty) { 2465 if (dirty) {
2479 /* recover my local alloc if we didn't unmount cleanly. */ 2466 /* recover my local alloc if we didn't unmount cleanly. */
2480 status = ocfs2_begin_local_alloc_recovery(osb, 2467 status = ocfs2_begin_local_alloc_recovery(osb,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 662f8dee149f..85b190dc132f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5334,16 +5334,6 @@ out:
5334 return ret; 5334 return ret;
5335} 5335}
5336 5336
5337static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5338 struct ocfs2_xattr_bucket *bucket,
5339 int offs)
5340{
5341 int block_off = offs >> inode->i_sb->s_blocksize_bits;
5342
5343 offs = offs % inode->i_sb->s_blocksize;
5344 return bucket_block(bucket, block_off) + offs;
5345}
5346
5347/* 5337/*
5348 * Truncate the specified xe_off entry in xattr bucket. 5338 * Truncate the specified xe_off entry in xattr bucket.
5349 * bucket is indicated by header_bh and len is the new length. 5339 * bucket is indicated by header_bh and len is the new length.
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 1e3187da1fed..7eee2d8b97d9 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -5,6 +5,7 @@
5#include <linux/ksm.h> 5#include <linux/ksm.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/mmzone.h> 7#include <linux/mmzone.h>
8#include <linux/huge_mm.h>
8#include <linux/proc_fs.h> 9#include <linux/proc_fs.h>
9#include <linux/seq_file.h> 10#include <linux/seq_file.h>
10#include <linux/hugetlb.h> 11#include <linux/hugetlb.h>
@@ -121,9 +122,18 @@ u64 stable_page_flags(struct page *page)
121 * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon 122 * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
122 * to make sure a given page is a thp, not a non-huge compound page. 123 * to make sure a given page is a thp, not a non-huge compound page.
123 */ 124 */
124 else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || 125 else if (PageTransCompound(page)) {
125 PageAnon(compound_head(page)))) 126 struct page *head = compound_head(page);
126 u |= 1 << KPF_THP; 127
128 if (PageLRU(head) || PageAnon(head))
129 u |= 1 << KPF_THP;
130 else if (is_huge_zero_page(head)) {
131 u |= 1 << KPF_ZERO_PAGE;
132 u |= 1 << KPF_THP;
133 }
134 } else if (is_zero_pfn(page_to_pfn(page)))
135 u |= 1 << KPF_ZERO_PAGE;
136
127 137
128 /* 138 /*
129 * Caveats on high order pages: page->_count will only be set 139 * Caveats on high order pages: page->_count will only be set
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 246eae84b13b..0e36c1e49fe3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -21,7 +21,7 @@
21 21
22void task_mem(struct seq_file *m, struct mm_struct *mm) 22void task_mem(struct seq_file *m, struct mm_struct *mm)
23{ 23{
24 unsigned long data, text, lib, swap; 24 unsigned long data, text, lib, swap, ptes, pmds;
25 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; 25 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
26 26
27 /* 27 /*
@@ -42,6 +42,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
42 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; 42 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
43 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; 43 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
44 swap = get_mm_counter(mm, MM_SWAPENTS); 44 swap = get_mm_counter(mm, MM_SWAPENTS);
45 ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes);
46 pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm);
45 seq_printf(m, 47 seq_printf(m,
46 "VmPeak:\t%8lu kB\n" 48 "VmPeak:\t%8lu kB\n"
47 "VmSize:\t%8lu kB\n" 49 "VmSize:\t%8lu kB\n"
@@ -54,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
54 "VmExe:\t%8lu kB\n" 56 "VmExe:\t%8lu kB\n"
55 "VmLib:\t%8lu kB\n" 57 "VmLib:\t%8lu kB\n"
56 "VmPTE:\t%8lu kB\n" 58 "VmPTE:\t%8lu kB\n"
59 "VmPMD:\t%8lu kB\n"
57 "VmSwap:\t%8lu kB\n", 60 "VmSwap:\t%8lu kB\n",
58 hiwater_vm << (PAGE_SHIFT-10), 61 hiwater_vm << (PAGE_SHIFT-10),
59 total_vm << (PAGE_SHIFT-10), 62 total_vm << (PAGE_SHIFT-10),
@@ -63,8 +66,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
63 total_rss << (PAGE_SHIFT-10), 66 total_rss << (PAGE_SHIFT-10),
64 data << (PAGE_SHIFT-10), 67 data << (PAGE_SHIFT-10),
65 mm->stack_vm << (PAGE_SHIFT-10), text, lib, 68 mm->stack_vm << (PAGE_SHIFT-10), text, lib,
66 (PTRS_PER_PTE * sizeof(pte_t) * 69 ptes >> 10,
67 atomic_long_read(&mm->nr_ptes)) >> 10, 70 pmds >> 10,
68 swap << (PAGE_SHIFT-10)); 71 swap << (PAGE_SHIFT-10));
69} 72}
70 73
@@ -433,7 +436,6 @@ const struct file_operations proc_tid_maps_operations = {
433 436
434#ifdef CONFIG_PROC_PAGE_MONITOR 437#ifdef CONFIG_PROC_PAGE_MONITOR
435struct mem_size_stats { 438struct mem_size_stats {
436 struct vm_area_struct *vma;
437 unsigned long resident; 439 unsigned long resident;
438 unsigned long shared_clean; 440 unsigned long shared_clean;
439 unsigned long shared_dirty; 441 unsigned long shared_dirty;
@@ -443,7 +445,6 @@ struct mem_size_stats {
443 unsigned long anonymous; 445 unsigned long anonymous;
444 unsigned long anonymous_thp; 446 unsigned long anonymous_thp;
445 unsigned long swap; 447 unsigned long swap;
446 unsigned long nonlinear;
447 u64 pss; 448 u64 pss;
448}; 449};
449 450
@@ -483,8 +484,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
483 struct mm_walk *walk) 484 struct mm_walk *walk)
484{ 485{
485 struct mem_size_stats *mss = walk->private; 486 struct mem_size_stats *mss = walk->private;
486 struct vm_area_struct *vma = mss->vma; 487 struct vm_area_struct *vma = walk->vma;
487 pgoff_t pgoff = linear_page_index(vma, addr);
488 struct page *page = NULL; 488 struct page *page = NULL;
489 489
490 if (pte_present(*pte)) { 490 if (pte_present(*pte)) {
@@ -496,17 +496,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
496 mss->swap += PAGE_SIZE; 496 mss->swap += PAGE_SIZE;
497 else if (is_migration_entry(swpent)) 497 else if (is_migration_entry(swpent))
498 page = migration_entry_to_page(swpent); 498 page = migration_entry_to_page(swpent);
499 } else if (pte_file(*pte)) {
500 if (pte_to_pgoff(*pte) != pgoff)
501 mss->nonlinear += PAGE_SIZE;
502 } 499 }
503 500
504 if (!page) 501 if (!page)
505 return; 502 return;
506
507 if (page->index != pgoff)
508 mss->nonlinear += PAGE_SIZE;
509
510 smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); 503 smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
511} 504}
512 505
@@ -515,7 +508,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
515 struct mm_walk *walk) 508 struct mm_walk *walk)
516{ 509{
517 struct mem_size_stats *mss = walk->private; 510 struct mem_size_stats *mss = walk->private;
518 struct vm_area_struct *vma = mss->vma; 511 struct vm_area_struct *vma = walk->vma;
519 struct page *page; 512 struct page *page;
520 513
521 /* FOLL_DUMP will return -EFAULT on huge zero page */ 514 /* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -536,8 +529,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
536static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 529static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
537 struct mm_walk *walk) 530 struct mm_walk *walk)
538{ 531{
539 struct mem_size_stats *mss = walk->private; 532 struct vm_area_struct *vma = walk->vma;
540 struct vm_area_struct *vma = mss->vma;
541 pte_t *pte; 533 pte_t *pte;
542 spinlock_t *ptl; 534 spinlock_t *ptl;
543 535
@@ -596,7 +588,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
596 [ilog2(VM_ACCOUNT)] = "ac", 588 [ilog2(VM_ACCOUNT)] = "ac",
597 [ilog2(VM_NORESERVE)] = "nr", 589 [ilog2(VM_NORESERVE)] = "nr",
598 [ilog2(VM_HUGETLB)] = "ht", 590 [ilog2(VM_HUGETLB)] = "ht",
599 [ilog2(VM_NONLINEAR)] = "nl",
600 [ilog2(VM_ARCH_1)] = "ar", 591 [ilog2(VM_ARCH_1)] = "ar",
601 [ilog2(VM_DONTDUMP)] = "dd", 592 [ilog2(VM_DONTDUMP)] = "dd",
602#ifdef CONFIG_MEM_SOFT_DIRTY 593#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -630,10 +621,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
630 }; 621 };
631 622
632 memset(&mss, 0, sizeof mss); 623 memset(&mss, 0, sizeof mss);
633 mss.vma = vma;
634 /* mmap_sem is held in m_start */ 624 /* mmap_sem is held in m_start */
635 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 625 walk_page_vma(vma, &smaps_walk);
636 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
637 626
638 show_map_vma(m, vma, is_pid); 627 show_map_vma(m, vma, is_pid);
639 628
@@ -668,10 +657,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
668 (vma->vm_flags & VM_LOCKED) ? 657 (vma->vm_flags & VM_LOCKED) ?
669 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 658 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
670 659
671 if (vma->vm_flags & VM_NONLINEAR)
672 seq_printf(m, "Nonlinear: %8lu kB\n",
673 mss.nonlinear >> 10);
674
675 show_smap_vma_flags(m, vma); 660 show_smap_vma_flags(m, vma);
676 m_cache_vma(m, vma); 661 m_cache_vma(m, vma);
677 return 0; 662 return 0;
@@ -751,14 +736,13 @@ enum clear_refs_types {
751}; 736};
752 737
753struct clear_refs_private { 738struct clear_refs_private {
754 struct vm_area_struct *vma;
755 enum clear_refs_types type; 739 enum clear_refs_types type;
756}; 740};
757 741
742#ifdef CONFIG_MEM_SOFT_DIRTY
758static inline void clear_soft_dirty(struct vm_area_struct *vma, 743static inline void clear_soft_dirty(struct vm_area_struct *vma,
759 unsigned long addr, pte_t *pte) 744 unsigned long addr, pte_t *pte)
760{ 745{
761#ifdef CONFIG_MEM_SOFT_DIRTY
762 /* 746 /*
763 * The soft-dirty tracker uses #PF-s to catch writes 747 * The soft-dirty tracker uses #PF-s to catch writes
764 * to pages, so write-protect the pte as well. See the 748 * to pages, so write-protect the pte as well. See the
@@ -772,24 +756,63 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
772 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); 756 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
773 } else if (is_swap_pte(ptent)) { 757 } else if (is_swap_pte(ptent)) {
774 ptent = pte_swp_clear_soft_dirty(ptent); 758 ptent = pte_swp_clear_soft_dirty(ptent);
775 } else if (pte_file(ptent)) {
776 ptent = pte_file_clear_soft_dirty(ptent);
777 } 759 }
778 760
779 set_pte_at(vma->vm_mm, addr, pte, ptent); 761 set_pte_at(vma->vm_mm, addr, pte, ptent);
780#endif
781} 762}
782 763
764static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
765 unsigned long addr, pmd_t *pmdp)
766{
767 pmd_t pmd = *pmdp;
768
769 pmd = pmd_wrprotect(pmd);
770 pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
771
772 if (vma->vm_flags & VM_SOFTDIRTY)
773 vma->vm_flags &= ~VM_SOFTDIRTY;
774
775 set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
776}
777
778#else
779
780static inline void clear_soft_dirty(struct vm_area_struct *vma,
781 unsigned long addr, pte_t *pte)
782{
783}
784
785static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
786 unsigned long addr, pmd_t *pmdp)
787{
788}
789#endif
790
783static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 791static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
784 unsigned long end, struct mm_walk *walk) 792 unsigned long end, struct mm_walk *walk)
785{ 793{
786 struct clear_refs_private *cp = walk->private; 794 struct clear_refs_private *cp = walk->private;
787 struct vm_area_struct *vma = cp->vma; 795 struct vm_area_struct *vma = walk->vma;
788 pte_t *pte, ptent; 796 pte_t *pte, ptent;
789 spinlock_t *ptl; 797 spinlock_t *ptl;
790 struct page *page; 798 struct page *page;
791 799
792 split_huge_page_pmd(vma, addr, pmd); 800 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
801 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
802 clear_soft_dirty_pmd(vma, addr, pmd);
803 goto out;
804 }
805
806 page = pmd_page(*pmd);
807
808 /* Clear accessed and referenced bits. */
809 pmdp_test_and_clear_young(vma, addr, pmd);
810 ClearPageReferenced(page);
811out:
812 spin_unlock(ptl);
813 return 0;
814 }
815
793 if (pmd_trans_unstable(pmd)) 816 if (pmd_trans_unstable(pmd))
794 return 0; 817 return 0;
795 818
@@ -818,6 +841,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
818 return 0; 841 return 0;
819} 842}
820 843
844static int clear_refs_test_walk(unsigned long start, unsigned long end,
845 struct mm_walk *walk)
846{
847 struct clear_refs_private *cp = walk->private;
848 struct vm_area_struct *vma = walk->vma;
849
850 if (vma->vm_flags & VM_PFNMAP)
851 return 1;
852
853 /*
854 * Writing 1 to /proc/pid/clear_refs affects all pages.
855 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
856 * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
857 * Writing 4 to /proc/pid/clear_refs affects all pages.
858 */
859 if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
860 return 1;
861 if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
862 return 1;
863 return 0;
864}
865
821static ssize_t clear_refs_write(struct file *file, const char __user *buf, 866static ssize_t clear_refs_write(struct file *file, const char __user *buf,
822 size_t count, loff_t *ppos) 867 size_t count, loff_t *ppos)
823{ 868{
@@ -858,6 +903,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
858 }; 903 };
859 struct mm_walk clear_refs_walk = { 904 struct mm_walk clear_refs_walk = {
860 .pmd_entry = clear_refs_pte_range, 905 .pmd_entry = clear_refs_pte_range,
906 .test_walk = clear_refs_test_walk,
861 .mm = mm, 907 .mm = mm,
862 .private = &cp, 908 .private = &cp,
863 }; 909 };
@@ -877,28 +923,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
877 } 923 }
878 mmu_notifier_invalidate_range_start(mm, 0, -1); 924 mmu_notifier_invalidate_range_start(mm, 0, -1);
879 } 925 }
880 for (vma = mm->mmap; vma; vma = vma->vm_next) { 926 walk_page_range(0, ~0UL, &clear_refs_walk);
881 cp.vma = vma;
882 if (is_vm_hugetlb_page(vma))
883 continue;
884 /*
885 * Writing 1 to /proc/pid/clear_refs affects all pages.
886 *
887 * Writing 2 to /proc/pid/clear_refs only affects
888 * Anonymous pages.
889 *
890 * Writing 3 to /proc/pid/clear_refs only affects file
891 * mapped pages.
892 *
893 * Writing 4 to /proc/pid/clear_refs affects all pages.
894 */
895 if (type == CLEAR_REFS_ANON && vma->vm_file)
896 continue;
897 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
898 continue;
899 walk_page_range(vma->vm_start, vma->vm_end,
900 &clear_refs_walk);
901 }
902 if (type == CLEAR_REFS_SOFT_DIRTY) 927 if (type == CLEAR_REFS_SOFT_DIRTY)
903 mmu_notifier_invalidate_range_end(mm, 0, -1); 928 mmu_notifier_invalidate_range_end(mm, 0, -1);
904 flush_tlb_mm(mm); 929 flush_tlb_mm(mm);
@@ -1066,15 +1091,13 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap
1066static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 1091static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1067 struct mm_walk *walk) 1092 struct mm_walk *walk)
1068{ 1093{
1069 struct vm_area_struct *vma; 1094 struct vm_area_struct *vma = walk->vma;
1070 struct pagemapread *pm = walk->private; 1095 struct pagemapread *pm = walk->private;
1071 spinlock_t *ptl; 1096 spinlock_t *ptl;
1072 pte_t *pte; 1097 pte_t *pte, *orig_pte;
1073 int err = 0; 1098 int err = 0;
1074 1099
1075 /* find the first VMA at or above 'addr' */ 1100 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1076 vma = find_vma(walk->mm, addr);
1077 if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1078 int pmd_flags2; 1101 int pmd_flags2;
1079 1102
1080 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) 1103 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1100,51 +1123,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1100 if (pmd_trans_unstable(pmd)) 1123 if (pmd_trans_unstable(pmd))
1101 return 0; 1124 return 0;
1102 1125
1103 while (1) { 1126 /*
1104 /* End of address space hole, which we mark as non-present. */ 1127 * We can assume that @vma always points to a valid one and @end never
1105 unsigned long hole_end; 1128 * goes beyond vma->vm_end.
1106 1129 */
1107 if (vma) 1130 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
1108 hole_end = min(end, vma->vm_start); 1131 for (; addr < end; pte++, addr += PAGE_SIZE) {
1109 else 1132 pagemap_entry_t pme;
1110 hole_end = end;
1111
1112 for (; addr < hole_end; addr += PAGE_SIZE) {
1113 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
1114
1115 err = add_to_pagemap(addr, &pme, pm);
1116 if (err)
1117 return err;
1118 }
1119
1120 if (!vma || vma->vm_start >= end)
1121 break;
1122 /*
1123 * We can't possibly be in a hugetlb VMA. In general,
1124 * for a mm_walk with a pmd_entry and a hugetlb_entry,
1125 * the pmd_entry can only be called on addresses in a
1126 * hugetlb if the walk starts in a non-hugetlb VMA and
1127 * spans a hugepage VMA. Since pagemap_read walks are
1128 * PMD-sized and PMD-aligned, this will never be true.
1129 */
1130 BUG_ON(is_vm_hugetlb_page(vma));
1131
1132 /* Addresses in the VMA. */
1133 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
1134 pagemap_entry_t pme;
1135 pte = pte_offset_map(pmd, addr);
1136 pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
1137 pte_unmap(pte);
1138 err = add_to_pagemap(addr, &pme, pm);
1139 if (err)
1140 return err;
1141 }
1142 1133
1143 if (addr == end) 1134 pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
1135 err = add_to_pagemap(addr, &pme, pm);
1136 if (err)
1144 break; 1137 break;
1145
1146 vma = find_vma(walk->mm, addr);
1147 } 1138 }
1139 pte_unmap_unlock(orig_pte, ptl);
1148 1140
1149 cond_resched(); 1141 cond_resched();
1150 1142
@@ -1170,15 +1162,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
1170 struct mm_walk *walk) 1162 struct mm_walk *walk)
1171{ 1163{
1172 struct pagemapread *pm = walk->private; 1164 struct pagemapread *pm = walk->private;
1173 struct vm_area_struct *vma; 1165 struct vm_area_struct *vma = walk->vma;
1174 int err = 0; 1166 int err = 0;
1175 int flags2; 1167 int flags2;
1176 pagemap_entry_t pme; 1168 pagemap_entry_t pme;
1177 1169
1178 vma = find_vma(walk->mm, addr); 1170 if (vma->vm_flags & VM_SOFTDIRTY)
1179 WARN_ON_ONCE(!vma);
1180
1181 if (vma && (vma->vm_flags & VM_SOFTDIRTY))
1182 flags2 = __PM_SOFT_DIRTY; 1171 flags2 = __PM_SOFT_DIRTY;
1183 else 1172 else
1184 flags2 = 0; 1173 flags2 = 0;
@@ -1338,7 +1327,6 @@ const struct file_operations proc_pagemap_operations = {
1338#ifdef CONFIG_NUMA 1327#ifdef CONFIG_NUMA
1339 1328
1340struct numa_maps { 1329struct numa_maps {
1341 struct vm_area_struct *vma;
1342 unsigned long pages; 1330 unsigned long pages;
1343 unsigned long anon; 1331 unsigned long anon;
1344 unsigned long active; 1332 unsigned long active;
@@ -1407,18 +1395,17 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
1407static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 1395static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1408 unsigned long end, struct mm_walk *walk) 1396 unsigned long end, struct mm_walk *walk)
1409{ 1397{
1410 struct numa_maps *md; 1398 struct numa_maps *md = walk->private;
1399 struct vm_area_struct *vma = walk->vma;
1411 spinlock_t *ptl; 1400 spinlock_t *ptl;
1412 pte_t *orig_pte; 1401 pte_t *orig_pte;
1413 pte_t *pte; 1402 pte_t *pte;
1414 1403
1415 md = walk->private; 1404 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1416
1417 if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
1418 pte_t huge_pte = *(pte_t *)pmd; 1405 pte_t huge_pte = *(pte_t *)pmd;
1419 struct page *page; 1406 struct page *page;
1420 1407
1421 page = can_gather_numa_stats(huge_pte, md->vma, addr); 1408 page = can_gather_numa_stats(huge_pte, vma, addr);
1422 if (page) 1409 if (page)
1423 gather_stats(page, md, pte_dirty(huge_pte), 1410 gather_stats(page, md, pte_dirty(huge_pte),
1424 HPAGE_PMD_SIZE/PAGE_SIZE); 1411 HPAGE_PMD_SIZE/PAGE_SIZE);
@@ -1430,7 +1417,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1430 return 0; 1417 return 0;
1431 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 1418 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
1432 do { 1419 do {
1433 struct page *page = can_gather_numa_stats(*pte, md->vma, addr); 1420 struct page *page = can_gather_numa_stats(*pte, vma, addr);
1434 if (!page) 1421 if (!page)
1435 continue; 1422 continue;
1436 gather_stats(page, md, pte_dirty(*pte), 1); 1423 gather_stats(page, md, pte_dirty(*pte), 1);
@@ -1440,7 +1427,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1440 return 0; 1427 return 0;
1441} 1428}
1442#ifdef CONFIG_HUGETLB_PAGE 1429#ifdef CONFIG_HUGETLB_PAGE
1443static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, 1430static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1444 unsigned long addr, unsigned long end, struct mm_walk *walk) 1431 unsigned long addr, unsigned long end, struct mm_walk *walk)
1445{ 1432{
1446 struct numa_maps *md; 1433 struct numa_maps *md;
@@ -1459,7 +1446,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
1459} 1446}
1460 1447
1461#else 1448#else
1462static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, 1449static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1463 unsigned long addr, unsigned long end, struct mm_walk *walk) 1450 unsigned long addr, unsigned long end, struct mm_walk *walk)
1464{ 1451{
1465 return 0; 1452 return 0;
@@ -1477,7 +1464,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1477 struct numa_maps *md = &numa_priv->md; 1464 struct numa_maps *md = &numa_priv->md;
1478 struct file *file = vma->vm_file; 1465 struct file *file = vma->vm_file;
1479 struct mm_struct *mm = vma->vm_mm; 1466 struct mm_struct *mm = vma->vm_mm;
1480 struct mm_walk walk = {}; 1467 struct mm_walk walk = {
1468 .hugetlb_entry = gather_hugetlb_stats,
1469 .pmd_entry = gather_pte_stats,
1470 .private = md,
1471 .mm = mm,
1472 };
1481 struct mempolicy *pol; 1473 struct mempolicy *pol;
1482 char buffer[64]; 1474 char buffer[64];
1483 int nid; 1475 int nid;
@@ -1488,13 +1480,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1488 /* Ensure we start with an empty set of numa_maps statistics. */ 1480 /* Ensure we start with an empty set of numa_maps statistics. */
1489 memset(md, 0, sizeof(*md)); 1481 memset(md, 0, sizeof(*md));
1490 1482
1491 md->vma = vma;
1492
1493 walk.hugetlb_entry = gather_hugetbl_stats;
1494 walk.pmd_entry = gather_pte_stats;
1495 walk.private = md;
1496 walk.mm = mm;
1497
1498 pol = __get_vma_policy(vma, vma->vm_start); 1483 pol = __get_vma_policy(vma, vma->vm_start);
1499 if (pol) { 1484 if (pol) {
1500 mpol_to_str(buffer, sizeof(buffer), pol); 1485 mpol_to_str(buffer, sizeof(buffer), pol);
@@ -1528,7 +1513,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1528 if (is_vm_hugetlb_page(vma)) 1513 if (is_vm_hugetlb_page(vma))
1529 seq_puts(m, " huge"); 1514 seq_puts(m, " huge");
1530 1515
1531 walk_page_range(vma->vm_start, vma->vm_end, &walk); 1516 /* mmap_sem is held by m_start */
1517 walk_page_vma(vma, &walk);
1532 1518
1533 if (!md->pages) 1519 if (!md->pages)
1534 goto out; 1520 goto out;
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 983d9510becc..916b8e23d968 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -21,6 +21,16 @@ config PSTORE_CONSOLE
21 When the option is enabled, pstore will log all kernel 21 When the option is enabled, pstore will log all kernel
22 messages, even if no oops or panic happened. 22 messages, even if no oops or panic happened.
23 23
24config PSTORE_PMSG
25 bool "Log user space messages"
26 depends on PSTORE
27 help
28 When the option is enabled, pstore will export a character
29 interface /dev/pmsg0 to log user space messages. On reboot
30 data can be retrieved from /sys/fs/pstore/pmsg-ramoops-[ID].
31
32 If unsure, say N.
33
24config PSTORE_FTRACE 34config PSTORE_FTRACE
25 bool "Persistent function tracer" 35 bool "Persistent function tracer"
26 depends on PSTORE 36 depends on PSTORE
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index 4c9095c2781e..e647d8e81712 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -7,5 +7,7 @@ obj-y += pstore.o
7pstore-objs += inode.o platform.o 7pstore-objs += inode.o platform.o
8obj-$(CONFIG_PSTORE_FTRACE) += ftrace.o 8obj-$(CONFIG_PSTORE_FTRACE) += ftrace.o
9 9
10obj-$(CONFIG_PSTORE_PMSG) += pmsg.o
11
10ramoops-objs += ram.o ram_core.o 12ramoops-objs += ram.o ram_core.o
11obj-$(CONFIG_PSTORE_RAM) += ramoops.o 13obj-$(CONFIG_PSTORE_RAM) += ramoops.o
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 50416602774d..b32ce53d24ee 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -338,32 +338,38 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
338 338
339 switch (type) { 339 switch (type) {
340 case PSTORE_TYPE_DMESG: 340 case PSTORE_TYPE_DMESG:
341 sprintf(name, "dmesg-%s-%lld%s", psname, id, 341 scnprintf(name, sizeof(name), "dmesg-%s-%lld%s",
342 compressed ? ".enc.z" : ""); 342 psname, id, compressed ? ".enc.z" : "");
343 break; 343 break;
344 case PSTORE_TYPE_CONSOLE: 344 case PSTORE_TYPE_CONSOLE:
345 sprintf(name, "console-%s-%lld", psname, id); 345 scnprintf(name, sizeof(name), "console-%s-%lld", psname, id);
346 break; 346 break;
347 case PSTORE_TYPE_FTRACE: 347 case PSTORE_TYPE_FTRACE:
348 sprintf(name, "ftrace-%s-%lld", psname, id); 348 scnprintf(name, sizeof(name), "ftrace-%s-%lld", psname, id);
349 break; 349 break;
350 case PSTORE_TYPE_MCE: 350 case PSTORE_TYPE_MCE:
351 sprintf(name, "mce-%s-%lld", psname, id); 351 scnprintf(name, sizeof(name), "mce-%s-%lld", psname, id);
352 break; 352 break;
353 case PSTORE_TYPE_PPC_RTAS: 353 case PSTORE_TYPE_PPC_RTAS:
354 sprintf(name, "rtas-%s-%lld", psname, id); 354 scnprintf(name, sizeof(name), "rtas-%s-%lld", psname, id);
355 break; 355 break;
356 case PSTORE_TYPE_PPC_OF: 356 case PSTORE_TYPE_PPC_OF:
357 sprintf(name, "powerpc-ofw-%s-%lld", psname, id); 357 scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld",
358 psname, id);
358 break; 359 break;
359 case PSTORE_TYPE_PPC_COMMON: 360 case PSTORE_TYPE_PPC_COMMON:
360 sprintf(name, "powerpc-common-%s-%lld", psname, id); 361 scnprintf(name, sizeof(name), "powerpc-common-%s-%lld",
362 psname, id);
363 break;
364 case PSTORE_TYPE_PMSG:
365 scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id);
361 break; 366 break;
362 case PSTORE_TYPE_UNKNOWN: 367 case PSTORE_TYPE_UNKNOWN:
363 sprintf(name, "unknown-%s-%lld", psname, id); 368 scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id);
364 break; 369 break;
365 default: 370 default:
366 sprintf(name, "type%d-%s-%lld", type, psname, id); 371 scnprintf(name, sizeof(name), "type%d-%s-%lld",
372 type, psname, id);
367 break; 373 break;
368 } 374 }
369 375
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 3b3d305277c4..c36ba2cd0b5d 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -45,6 +45,12 @@ extern void pstore_register_ftrace(void);
45static inline void pstore_register_ftrace(void) {} 45static inline void pstore_register_ftrace(void) {}
46#endif 46#endif
47 47
48#ifdef CONFIG_PSTORE_PMSG
49extern void pstore_register_pmsg(void);
50#else
51static inline void pstore_register_pmsg(void) {}
52#endif
53
48extern struct pstore_info *psinfo; 54extern struct pstore_info *psinfo;
49 55
50extern void pstore_set_kmsg_bytes(int); 56extern void pstore_set_kmsg_bytes(int);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 0a9b72cdfeca..c4c9a10c5760 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -301,7 +301,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
301 301
302 if (big_oops_buf) { 302 if (big_oops_buf) {
303 dst = big_oops_buf; 303 dst = big_oops_buf;
304 hsize = sprintf(dst, "%s#%d Part%d\n", why, 304 hsize = sprintf(dst, "%s#%d Part%u\n", why,
305 oopscount, part); 305 oopscount, part);
306 size = big_oops_buf_sz - hsize; 306 size = big_oops_buf_sz - hsize;
307 307
@@ -321,7 +321,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
321 } 321 }
322 } else { 322 } else {
323 dst = psinfo->buf; 323 dst = psinfo->buf;
324 hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, 324 hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount,
325 part); 325 part);
326 size = psinfo->bufsize - hsize; 326 size = psinfo->bufsize - hsize;
327 dst += hsize; 327 dst += hsize;
@@ -447,6 +447,7 @@ int pstore_register(struct pstore_info *psi)
447 if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) { 447 if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
448 pstore_register_console(); 448 pstore_register_console();
449 pstore_register_ftrace(); 449 pstore_register_ftrace();
450 pstore_register_pmsg();
450 } 451 }
451 452
452 if (pstore_update_ms >= 0) { 453 if (pstore_update_ms >= 0) {
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
new file mode 100644
index 000000000000..feb5dd2948b4
--- /dev/null
+++ b/fs/pstore/pmsg.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright 2014 Google, Inc.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14#include <linux/cdev.h>
15#include <linux/device.h>
16#include <linux/fs.h>
17#include <linux/uaccess.h>
18#include <linux/vmalloc.h>
19#include "internal.h"
20
21static DEFINE_MUTEX(pmsg_lock);
22#define PMSG_MAX_BOUNCE_BUFFER_SIZE (2*PAGE_SIZE)
23
24static ssize_t write_pmsg(struct file *file, const char __user *buf,
25 size_t count, loff_t *ppos)
26{
27 size_t i, buffer_size;
28 char *buffer;
29
30 if (!count)
31 return 0;
32
33 if (!access_ok(VERIFY_READ, buf, count))
34 return -EFAULT;
35
36 buffer_size = count;
37 if (buffer_size > PMSG_MAX_BOUNCE_BUFFER_SIZE)
38 buffer_size = PMSG_MAX_BOUNCE_BUFFER_SIZE;
39 buffer = vmalloc(buffer_size);
40
41 mutex_lock(&pmsg_lock);
42 for (i = 0; i < count; ) {
43 size_t c = min(count - i, buffer_size);
44 u64 id;
45 long ret;
46
47 ret = __copy_from_user(buffer, buf + i, c);
48 if (unlikely(ret != 0)) {
49 mutex_unlock(&pmsg_lock);
50 vfree(buffer);
51 return -EFAULT;
52 }
53 psinfo->write_buf(PSTORE_TYPE_PMSG, 0, &id, 0, buffer, 0, c,
54 psinfo);
55
56 i += c;
57 }
58
59 mutex_unlock(&pmsg_lock);
60 vfree(buffer);
61 return count;
62}
63
64static const struct file_operations pmsg_fops = {
65 .owner = THIS_MODULE,
66 .llseek = noop_llseek,
67 .write = write_pmsg,
68};
69
70static struct class *pmsg_class;
71static int pmsg_major;
72#define PMSG_NAME "pmsg"
73#undef pr_fmt
74#define pr_fmt(fmt) PMSG_NAME ": " fmt
75
76static char *pmsg_devnode(struct device *dev, umode_t *mode)
77{
78 if (mode)
79 *mode = 0220;
80 return NULL;
81}
82
83void pstore_register_pmsg(void)
84{
85 struct device *pmsg_device;
86
87 pmsg_major = register_chrdev(0, PMSG_NAME, &pmsg_fops);
88 if (pmsg_major < 0) {
89 pr_err("register_chrdev failed\n");
90 goto err;
91 }
92
93 pmsg_class = class_create(THIS_MODULE, PMSG_NAME);
94 if (IS_ERR(pmsg_class)) {
95 pr_err("device class file already in use\n");
96 goto err_class;
97 }
98 pmsg_class->devnode = pmsg_devnode;
99
100 pmsg_device = device_create(pmsg_class, NULL, MKDEV(pmsg_major, 0),
101 NULL, "%s%d", PMSG_NAME, 0);
102 if (IS_ERR(pmsg_device)) {
103 pr_err("failed to create device\n");
104 goto err_device;
105 }
106 return;
107
108err_device:
109 class_destroy(pmsg_class);
110err_class:
111 unregister_chrdev(pmsg_major, PMSG_NAME);
112err:
113 return;
114}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 8613e5b35c22..39d1373128e9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -51,6 +51,10 @@ static ulong ramoops_ftrace_size = MIN_MEM_SIZE;
51module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400); 51module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400);
52MODULE_PARM_DESC(ftrace_size, "size of ftrace log"); 52MODULE_PARM_DESC(ftrace_size, "size of ftrace log");
53 53
54static ulong ramoops_pmsg_size = MIN_MEM_SIZE;
55module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400);
56MODULE_PARM_DESC(pmsg_size, "size of user space message log");
57
54static ulong mem_address; 58static ulong mem_address;
55module_param(mem_address, ulong, 0400); 59module_param(mem_address, ulong, 0400);
56MODULE_PARM_DESC(mem_address, 60MODULE_PARM_DESC(mem_address,
@@ -82,12 +86,14 @@ struct ramoops_context {
82 struct persistent_ram_zone **przs; 86 struct persistent_ram_zone **przs;
83 struct persistent_ram_zone *cprz; 87 struct persistent_ram_zone *cprz;
84 struct persistent_ram_zone *fprz; 88 struct persistent_ram_zone *fprz;
89 struct persistent_ram_zone *mprz;
85 phys_addr_t phys_addr; 90 phys_addr_t phys_addr;
86 unsigned long size; 91 unsigned long size;
87 unsigned int memtype; 92 unsigned int memtype;
88 size_t record_size; 93 size_t record_size;
89 size_t console_size; 94 size_t console_size;
90 size_t ftrace_size; 95 size_t ftrace_size;
96 size_t pmsg_size;
91 int dump_oops; 97 int dump_oops;
92 struct persistent_ram_ecc_info ecc_info; 98 struct persistent_ram_ecc_info ecc_info;
93 unsigned int max_dump_cnt; 99 unsigned int max_dump_cnt;
@@ -96,6 +102,7 @@ struct ramoops_context {
96 unsigned int dump_read_cnt; 102 unsigned int dump_read_cnt;
97 unsigned int console_read_cnt; 103 unsigned int console_read_cnt;
98 unsigned int ftrace_read_cnt; 104 unsigned int ftrace_read_cnt;
105 unsigned int pmsg_read_cnt;
99 struct pstore_info pstore; 106 struct pstore_info pstore;
100}; 107};
101 108
@@ -109,6 +116,7 @@ static int ramoops_pstore_open(struct pstore_info *psi)
109 cxt->dump_read_cnt = 0; 116 cxt->dump_read_cnt = 0;
110 cxt->console_read_cnt = 0; 117 cxt->console_read_cnt = 0;
111 cxt->ftrace_read_cnt = 0; 118 cxt->ftrace_read_cnt = 0;
119 cxt->pmsg_read_cnt = 0;
112 return 0; 120 return 0;
113} 121}
114 122
@@ -164,6 +172,12 @@ static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time,
164 return header_length; 172 return header_length;
165} 173}
166 174
175static bool prz_ok(struct persistent_ram_zone *prz)
176{
177 return !!prz && !!(persistent_ram_old_size(prz) +
178 persistent_ram_ecc_string(prz, NULL, 0));
179}
180
167static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, 181static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
168 int *count, struct timespec *time, 182 int *count, struct timespec *time,
169 char **buf, bool *compressed, 183 char **buf, bool *compressed,
@@ -178,13 +192,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
178 prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, 192 prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt,
179 cxt->max_dump_cnt, id, type, 193 cxt->max_dump_cnt, id, type,
180 PSTORE_TYPE_DMESG, 1); 194 PSTORE_TYPE_DMESG, 1);
181 if (!prz) 195 if (!prz_ok(prz))
182 prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, 196 prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt,
183 1, id, type, PSTORE_TYPE_CONSOLE, 0); 197 1, id, type, PSTORE_TYPE_CONSOLE, 0);
184 if (!prz) 198 if (!prz_ok(prz))
185 prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, 199 prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt,
186 1, id, type, PSTORE_TYPE_FTRACE, 0); 200 1, id, type, PSTORE_TYPE_FTRACE, 0);
187 if (!prz) 201 if (!prz_ok(prz))
202 prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt,
203 1, id, type, PSTORE_TYPE_PMSG, 0);
204 if (!prz_ok(prz))
188 return 0; 205 return 0;
189 206
190 if (!persistent_ram_old(prz)) 207 if (!persistent_ram_old(prz))
@@ -252,6 +269,11 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
252 return -ENOMEM; 269 return -ENOMEM;
253 persistent_ram_write(cxt->fprz, buf, size); 270 persistent_ram_write(cxt->fprz, buf, size);
254 return 0; 271 return 0;
272 } else if (type == PSTORE_TYPE_PMSG) {
273 if (!cxt->mprz)
274 return -ENOMEM;
275 persistent_ram_write(cxt->mprz, buf, size);
276 return 0;
255 } 277 }
256 278
257 if (type != PSTORE_TYPE_DMESG) 279 if (type != PSTORE_TYPE_DMESG)
@@ -309,6 +331,9 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
309 case PSTORE_TYPE_FTRACE: 331 case PSTORE_TYPE_FTRACE:
310 prz = cxt->fprz; 332 prz = cxt->fprz;
311 break; 333 break;
334 case PSTORE_TYPE_PMSG:
335 prz = cxt->mprz;
336 break;
312 default: 337 default:
313 return -EINVAL; 338 return -EINVAL;
314 } 339 }
@@ -435,7 +460,7 @@ static int ramoops_probe(struct platform_device *pdev)
435 goto fail_out; 460 goto fail_out;
436 461
437 if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && 462 if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size &&
438 !pdata->ftrace_size)) { 463 !pdata->ftrace_size && !pdata->pmsg_size)) {
439 pr_err("The memory size and the record/console size must be " 464 pr_err("The memory size and the record/console size must be "
440 "non-zero\n"); 465 "non-zero\n");
441 goto fail_out; 466 goto fail_out;
@@ -447,6 +472,8 @@ static int ramoops_probe(struct platform_device *pdev)
447 pdata->console_size = rounddown_pow_of_two(pdata->console_size); 472 pdata->console_size = rounddown_pow_of_two(pdata->console_size);
448 if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) 473 if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size))
449 pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); 474 pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size);
475 if (pdata->pmsg_size && !is_power_of_2(pdata->pmsg_size))
476 pdata->pmsg_size = rounddown_pow_of_two(pdata->pmsg_size);
450 477
451 cxt->size = pdata->mem_size; 478 cxt->size = pdata->mem_size;
452 cxt->phys_addr = pdata->mem_address; 479 cxt->phys_addr = pdata->mem_address;
@@ -454,12 +481,14 @@ static int ramoops_probe(struct platform_device *pdev)
454 cxt->record_size = pdata->record_size; 481 cxt->record_size = pdata->record_size;
455 cxt->console_size = pdata->console_size; 482 cxt->console_size = pdata->console_size;
456 cxt->ftrace_size = pdata->ftrace_size; 483 cxt->ftrace_size = pdata->ftrace_size;
484 cxt->pmsg_size = pdata->pmsg_size;
457 cxt->dump_oops = pdata->dump_oops; 485 cxt->dump_oops = pdata->dump_oops;
458 cxt->ecc_info = pdata->ecc_info; 486 cxt->ecc_info = pdata->ecc_info;
459 487
460 paddr = cxt->phys_addr; 488 paddr = cxt->phys_addr;
461 489
462 dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size; 490 dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size
491 - cxt->pmsg_size;
463 err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); 492 err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz);
464 if (err) 493 if (err)
465 goto fail_out; 494 goto fail_out;
@@ -474,13 +503,9 @@ static int ramoops_probe(struct platform_device *pdev)
474 if (err) 503 if (err)
475 goto fail_init_fprz; 504 goto fail_init_fprz;
476 505
477 if (!cxt->przs && !cxt->cprz && !cxt->fprz) { 506 err = ramoops_init_prz(dev, cxt, &cxt->mprz, &paddr, cxt->pmsg_size, 0);
478 pr_err("memory size too small, minimum is %zu\n", 507 if (err)
479 cxt->console_size + cxt->record_size + 508 goto fail_init_mprz;
480 cxt->ftrace_size);
481 err = -EINVAL;
482 goto fail_cnt;
483 }
484 509
485 cxt->pstore.data = cxt; 510 cxt->pstore.data = cxt;
486 /* 511 /*
@@ -525,7 +550,8 @@ fail_buf:
525 kfree(cxt->pstore.buf); 550 kfree(cxt->pstore.buf);
526fail_clear: 551fail_clear:
527 cxt->pstore.bufsize = 0; 552 cxt->pstore.bufsize = 0;
528fail_cnt: 553 kfree(cxt->mprz);
554fail_init_mprz:
529 kfree(cxt->fprz); 555 kfree(cxt->fprz);
530fail_init_fprz: 556fail_init_fprz:
531 kfree(cxt->cprz); 557 kfree(cxt->cprz);
@@ -583,6 +609,7 @@ static void ramoops_register_dummy(void)
583 dummy_data->record_size = record_size; 609 dummy_data->record_size = record_size;
584 dummy_data->console_size = ramoops_console_size; 610 dummy_data->console_size = ramoops_console_size;
585 dummy_data->ftrace_size = ramoops_ftrace_size; 611 dummy_data->ftrace_size = ramoops_ftrace_size;
612 dummy_data->pmsg_size = ramoops_pmsg_size;
586 dummy_data->dump_oops = dump_oops; 613 dummy_data->dump_oops = dump_oops;
587 /* 614 /*
588 * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC 615 * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index c51df1dd237e..4a09975aac90 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -5,6 +5,7 @@
5config QUOTA 5config QUOTA
6 bool "Quota support" 6 bool "Quota support"
7 select QUOTACTL 7 select QUOTACTL
8 select SRCU
8 help 9 help
9 If you say Y here, you will be able to set per user limits for disk 10 If you say Y here, you will be able to set per user limits for disk
10 usage (also called disk quotas). Currently, it works for the 11 usage (also called disk quotas). Currently, it works for the
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8f0acef3d184..0ccd4ba3a246 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1248,7 +1248,7 @@ static int ignore_hardlimit(struct dquot *dquot)
1248 1248
1249 return capable(CAP_SYS_RESOURCE) && 1249 return capable(CAP_SYS_RESOURCE) &&
1250 (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || 1250 (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
1251 !(info->dqi_flags & V1_DQF_RSQUASH)); 1251 !(info->dqi_flags & DQF_ROOT_SQUASH));
1252} 1252}
1253 1253
1254/* needs dq_data_lock */ 1254/* needs dq_data_lock */
@@ -2385,41 +2385,106 @@ out:
2385} 2385}
2386EXPORT_SYMBOL(dquot_quota_on_mount); 2386EXPORT_SYMBOL(dquot_quota_on_mount);
2387 2387
2388static inline qsize_t qbtos(qsize_t blocks) 2388static int dquot_quota_enable(struct super_block *sb, unsigned int flags)
2389{ 2389{
2390 return blocks << QIF_DQBLKSIZE_BITS; 2390 int ret;
2391 int type;
2392 struct quota_info *dqopt = sb_dqopt(sb);
2393
2394 if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE))
2395 return -ENOSYS;
2396 /* Accounting cannot be turned on while fs is mounted */
2397 flags &= ~(FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT);
2398 if (!flags)
2399 return -EINVAL;
2400 for (type = 0; type < MAXQUOTAS; type++) {
2401 if (!(flags & qtype_enforce_flag(type)))
2402 continue;
2403 /* Can't enforce without accounting */
2404 if (!sb_has_quota_usage_enabled(sb, type))
2405 return -EINVAL;
2406 ret = dquot_enable(dqopt->files[type], type,
2407 dqopt->info[type].dqi_fmt_id,
2408 DQUOT_LIMITS_ENABLED);
2409 if (ret < 0)
2410 goto out_err;
2411 }
2412 return 0;
2413out_err:
2414 /* Backout enforcement enablement we already did */
2415 for (type--; type >= 0; type--) {
2416 if (flags & qtype_enforce_flag(type))
2417 dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
2418 }
2419 /* Error code translation for better compatibility with XFS */
2420 if (ret == -EBUSY)
2421 ret = -EEXIST;
2422 return ret;
2391} 2423}
2392 2424
2393static inline qsize_t stoqb(qsize_t space) 2425static int dquot_quota_disable(struct super_block *sb, unsigned int flags)
2394{ 2426{
2395 return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; 2427 int ret;
2428 int type;
2429 struct quota_info *dqopt = sb_dqopt(sb);
2430
2431 if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE))
2432 return -ENOSYS;
2433 /*
2434 * We don't support turning off accounting via quotactl. In principle
2435 * quota infrastructure can do this but filesystems don't expect
2436 * userspace to be able to do it.
2437 */
2438 if (flags &
2439 (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT))
2440 return -EOPNOTSUPP;
2441
2442 /* Filter out limits not enabled */
2443 for (type = 0; type < MAXQUOTAS; type++)
2444 if (!sb_has_quota_limits_enabled(sb, type))
2445 flags &= ~qtype_enforce_flag(type);
2446 /* Nothing left? */
2447 if (!flags)
2448 return -EEXIST;
2449 for (type = 0; type < MAXQUOTAS; type++) {
2450 if (flags & qtype_enforce_flag(type)) {
2451 ret = dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
2452 if (ret < 0)
2453 goto out_err;
2454 }
2455 }
2456 return 0;
2457out_err:
2458 /* Backout enforcement disabling we already did */
2459 for (type--; type >= 0; type--) {
2460 if (flags & qtype_enforce_flag(type))
2461 dquot_enable(dqopt->files[type], type,
2462 dqopt->info[type].dqi_fmt_id,
2463 DQUOT_LIMITS_ENABLED);
2464 }
2465 return ret;
2396} 2466}
2397 2467
2398/* Generic routine for getting common part of quota structure */ 2468/* Generic routine for getting common part of quota structure */
2399static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) 2469static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di)
2400{ 2470{
2401 struct mem_dqblk *dm = &dquot->dq_dqb; 2471 struct mem_dqblk *dm = &dquot->dq_dqb;
2402 2472
2403 memset(di, 0, sizeof(*di)); 2473 memset(di, 0, sizeof(*di));
2404 di->d_version = FS_DQUOT_VERSION;
2405 di->d_flags = dquot->dq_id.type == USRQUOTA ?
2406 FS_USER_QUOTA : FS_GROUP_QUOTA;
2407 di->d_id = from_kqid_munged(current_user_ns(), dquot->dq_id);
2408
2409 spin_lock(&dq_data_lock); 2474 spin_lock(&dq_data_lock);
2410 di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); 2475 di->d_spc_hardlimit = dm->dqb_bhardlimit;
2411 di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); 2476 di->d_spc_softlimit = dm->dqb_bsoftlimit;
2412 di->d_ino_hardlimit = dm->dqb_ihardlimit; 2477 di->d_ino_hardlimit = dm->dqb_ihardlimit;
2413 di->d_ino_softlimit = dm->dqb_isoftlimit; 2478 di->d_ino_softlimit = dm->dqb_isoftlimit;
2414 di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace; 2479 di->d_space = dm->dqb_curspace + dm->dqb_rsvspace;
2415 di->d_icount = dm->dqb_curinodes; 2480 di->d_ino_count = dm->dqb_curinodes;
2416 di->d_btimer = dm->dqb_btime; 2481 di->d_spc_timer = dm->dqb_btime;
2417 di->d_itimer = dm->dqb_itime; 2482 di->d_ino_timer = dm->dqb_itime;
2418 spin_unlock(&dq_data_lock); 2483 spin_unlock(&dq_data_lock);
2419} 2484}
2420 2485
2421int dquot_get_dqblk(struct super_block *sb, struct kqid qid, 2486int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
2422 struct fs_disk_quota *di) 2487 struct qc_dqblk *di)
2423{ 2488{
2424 struct dquot *dquot; 2489 struct dquot *dquot;
2425 2490
@@ -2433,70 +2498,70 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
2433} 2498}
2434EXPORT_SYMBOL(dquot_get_dqblk); 2499EXPORT_SYMBOL(dquot_get_dqblk);
2435 2500
2436#define VFS_FS_DQ_MASK \ 2501#define VFS_QC_MASK \
2437 (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ 2502 (QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \
2438 FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \ 2503 QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \
2439 FS_DQ_BTIMER | FS_DQ_ITIMER) 2504 QC_SPC_TIMER | QC_INO_TIMER)
2440 2505
2441/* Generic routine for setting common part of quota structure */ 2506/* Generic routine for setting common part of quota structure */
2442static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) 2507static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
2443{ 2508{
2444 struct mem_dqblk *dm = &dquot->dq_dqb; 2509 struct mem_dqblk *dm = &dquot->dq_dqb;
2445 int check_blim = 0, check_ilim = 0; 2510 int check_blim = 0, check_ilim = 0;
2446 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; 2511 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
2447 2512
2448 if (di->d_fieldmask & ~VFS_FS_DQ_MASK) 2513 if (di->d_fieldmask & ~VFS_QC_MASK)
2449 return -EINVAL; 2514 return -EINVAL;
2450 2515
2451 if (((di->d_fieldmask & FS_DQ_BSOFT) && 2516 if (((di->d_fieldmask & QC_SPC_SOFT) &&
2452 (di->d_blk_softlimit > dqi->dqi_maxblimit)) || 2517 di->d_spc_softlimit > dqi->dqi_max_spc_limit) ||
2453 ((di->d_fieldmask & FS_DQ_BHARD) && 2518 ((di->d_fieldmask & QC_SPC_HARD) &&
2454 (di->d_blk_hardlimit > dqi->dqi_maxblimit)) || 2519 di->d_spc_hardlimit > dqi->dqi_max_spc_limit) ||
2455 ((di->d_fieldmask & FS_DQ_ISOFT) && 2520 ((di->d_fieldmask & QC_INO_SOFT) &&
2456 (di->d_ino_softlimit > dqi->dqi_maxilimit)) || 2521 (di->d_ino_softlimit > dqi->dqi_max_ino_limit)) ||
2457 ((di->d_fieldmask & FS_DQ_IHARD) && 2522 ((di->d_fieldmask & QC_INO_HARD) &&
2458 (di->d_ino_hardlimit > dqi->dqi_maxilimit))) 2523 (di->d_ino_hardlimit > dqi->dqi_max_ino_limit)))
2459 return -ERANGE; 2524 return -ERANGE;
2460 2525
2461 spin_lock(&dq_data_lock); 2526 spin_lock(&dq_data_lock);
2462 if (di->d_fieldmask & FS_DQ_BCOUNT) { 2527 if (di->d_fieldmask & QC_SPACE) {
2463 dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; 2528 dm->dqb_curspace = di->d_space - dm->dqb_rsvspace;
2464 check_blim = 1; 2529 check_blim = 1;
2465 set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); 2530 set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
2466 } 2531 }
2467 2532
2468 if (di->d_fieldmask & FS_DQ_BSOFT) 2533 if (di->d_fieldmask & QC_SPC_SOFT)
2469 dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit); 2534 dm->dqb_bsoftlimit = di->d_spc_softlimit;
2470 if (di->d_fieldmask & FS_DQ_BHARD) 2535 if (di->d_fieldmask & QC_SPC_HARD)
2471 dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit); 2536 dm->dqb_bhardlimit = di->d_spc_hardlimit;
2472 if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) { 2537 if (di->d_fieldmask & (QC_SPC_SOFT | QC_SPC_HARD)) {
2473 check_blim = 1; 2538 check_blim = 1;
2474 set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); 2539 set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
2475 } 2540 }
2476 2541
2477 if (di->d_fieldmask & FS_DQ_ICOUNT) { 2542 if (di->d_fieldmask & QC_INO_COUNT) {
2478 dm->dqb_curinodes = di->d_icount; 2543 dm->dqb_curinodes = di->d_ino_count;
2479 check_ilim = 1; 2544 check_ilim = 1;
2480 set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); 2545 set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
2481 } 2546 }
2482 2547
2483 if (di->d_fieldmask & FS_DQ_ISOFT) 2548 if (di->d_fieldmask & QC_INO_SOFT)
2484 dm->dqb_isoftlimit = di->d_ino_softlimit; 2549 dm->dqb_isoftlimit = di->d_ino_softlimit;
2485 if (di->d_fieldmask & FS_DQ_IHARD) 2550 if (di->d_fieldmask & QC_INO_HARD)
2486 dm->dqb_ihardlimit = di->d_ino_hardlimit; 2551 dm->dqb_ihardlimit = di->d_ino_hardlimit;
2487 if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) { 2552 if (di->d_fieldmask & (QC_INO_SOFT | QC_INO_HARD)) {
2488 check_ilim = 1; 2553 check_ilim = 1;
2489 set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); 2554 set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
2490 } 2555 }
2491 2556
2492 if (di->d_fieldmask & FS_DQ_BTIMER) { 2557 if (di->d_fieldmask & QC_SPC_TIMER) {
2493 dm->dqb_btime = di->d_btimer; 2558 dm->dqb_btime = di->d_spc_timer;
2494 check_blim = 1; 2559 check_blim = 1;
2495 set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); 2560 set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
2496 } 2561 }
2497 2562
2498 if (di->d_fieldmask & FS_DQ_ITIMER) { 2563 if (di->d_fieldmask & QC_INO_TIMER) {
2499 dm->dqb_itime = di->d_itimer; 2564 dm->dqb_itime = di->d_ino_timer;
2500 check_ilim = 1; 2565 check_ilim = 1;
2501 set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); 2566 set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
2502 } 2567 }
@@ -2506,7 +2571,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2506 dm->dqb_curspace < dm->dqb_bsoftlimit) { 2571 dm->dqb_curspace < dm->dqb_bsoftlimit) {
2507 dm->dqb_btime = 0; 2572 dm->dqb_btime = 0;
2508 clear_bit(DQ_BLKS_B, &dquot->dq_flags); 2573 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
2509 } else if (!(di->d_fieldmask & FS_DQ_BTIMER)) 2574 } else if (!(di->d_fieldmask & QC_SPC_TIMER))
2510 /* Set grace only if user hasn't provided his own... */ 2575 /* Set grace only if user hasn't provided his own... */
2511 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; 2576 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
2512 } 2577 }
@@ -2515,7 +2580,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2515 dm->dqb_curinodes < dm->dqb_isoftlimit) { 2580 dm->dqb_curinodes < dm->dqb_isoftlimit) {
2516 dm->dqb_itime = 0; 2581 dm->dqb_itime = 0;
2517 clear_bit(DQ_INODES_B, &dquot->dq_flags); 2582 clear_bit(DQ_INODES_B, &dquot->dq_flags);
2518 } else if (!(di->d_fieldmask & FS_DQ_ITIMER)) 2583 } else if (!(di->d_fieldmask & QC_INO_TIMER))
2519 /* Set grace only if user hasn't provided his own... */ 2584 /* Set grace only if user hasn't provided his own... */
2520 dm->dqb_itime = get_seconds() + dqi->dqi_igrace; 2585 dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
2521 } 2586 }
@@ -2531,7 +2596,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2531} 2596}
2532 2597
2533int dquot_set_dqblk(struct super_block *sb, struct kqid qid, 2598int dquot_set_dqblk(struct super_block *sb, struct kqid qid,
2534 struct fs_disk_quota *di) 2599 struct qc_dqblk *di)
2535{ 2600{
2536 struct dquot *dquot; 2601 struct dquot *dquot;
2537 int rc; 2602 int rc;
@@ -2582,6 +2647,14 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
2582 goto out; 2647 goto out;
2583 } 2648 }
2584 mi = sb_dqopt(sb)->info + type; 2649 mi = sb_dqopt(sb)->info + type;
2650 if (ii->dqi_valid & IIF_FLAGS) {
2651 if (ii->dqi_flags & ~DQF_SETINFO_MASK ||
2652 (ii->dqi_flags & DQF_ROOT_SQUASH &&
2653 mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) {
2654 err = -EINVAL;
2655 goto out;
2656 }
2657 }
2585 spin_lock(&dq_data_lock); 2658 spin_lock(&dq_data_lock);
2586 if (ii->dqi_valid & IIF_BGRACE) 2659 if (ii->dqi_valid & IIF_BGRACE)
2587 mi->dqi_bgrace = ii->dqi_bgrace; 2660 mi->dqi_bgrace = ii->dqi_bgrace;
@@ -2611,6 +2684,17 @@ const struct quotactl_ops dquot_quotactl_ops = {
2611}; 2684};
2612EXPORT_SYMBOL(dquot_quotactl_ops); 2685EXPORT_SYMBOL(dquot_quotactl_ops);
2613 2686
2687const struct quotactl_ops dquot_quotactl_sysfile_ops = {
2688 .quota_enable = dquot_quota_enable,
2689 .quota_disable = dquot_quota_disable,
2690 .quota_sync = dquot_quota_sync,
2691 .get_info = dquot_get_dqinfo,
2692 .set_info = dquot_set_dqinfo,
2693 .get_dqblk = dquot_get_dqblk,
2694 .set_dqblk = dquot_set_dqblk
2695};
2696EXPORT_SYMBOL(dquot_quotactl_sysfile_ops);
2697
2614static int do_proc_dqstats(struct ctl_table *table, int write, 2698static int do_proc_dqstats(struct ctl_table *table, int write,
2615 void __user *buffer, size_t *lenp, loff_t *ppos) 2699 void __user *buffer, size_t *lenp, loff_t *ppos)
2616{ 2700{
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2aa4151f99d2..d14a799c7785 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -66,18 +66,40 @@ static int quota_sync_all(int type)
66 return ret; 66 return ret;
67} 67}
68 68
69unsigned int qtype_enforce_flag(int type)
70{
71 switch (type) {
72 case USRQUOTA:
73 return FS_QUOTA_UDQ_ENFD;
74 case GRPQUOTA:
75 return FS_QUOTA_GDQ_ENFD;
76 case PRJQUOTA:
77 return FS_QUOTA_PDQ_ENFD;
78 }
79 return 0;
80}
81
69static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, 82static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
70 struct path *path) 83 struct path *path)
71{ 84{
72 if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_on_meta) 85 if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
73 return -ENOSYS; 86 return -ENOSYS;
74 if (sb->s_qcop->quota_on_meta) 87 if (sb->s_qcop->quota_enable)
75 return sb->s_qcop->quota_on_meta(sb, type, id); 88 return sb->s_qcop->quota_enable(sb, qtype_enforce_flag(type));
76 if (IS_ERR(path)) 89 if (IS_ERR(path))
77 return PTR_ERR(path); 90 return PTR_ERR(path);
78 return sb->s_qcop->quota_on(sb, type, id, path); 91 return sb->s_qcop->quota_on(sb, type, id, path);
79} 92}
80 93
94static int quota_quotaoff(struct super_block *sb, int type)
95{
96 if (!sb->s_qcop->quota_off && !sb->s_qcop->quota_disable)
97 return -ENOSYS;
98 if (sb->s_qcop->quota_disable)
99 return sb->s_qcop->quota_disable(sb, qtype_enforce_flag(type));
100 return sb->s_qcop->quota_off(sb, type);
101}
102
81static int quota_getfmt(struct super_block *sb, int type, void __user *addr) 103static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
82{ 104{
83 __u32 fmt; 105 __u32 fmt;
@@ -118,17 +140,27 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
118 return sb->s_qcop->set_info(sb, type, &info); 140 return sb->s_qcop->set_info(sb, type, &info);
119} 141}
120 142
121static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) 143static inline qsize_t qbtos(qsize_t blocks)
144{
145 return blocks << QIF_DQBLKSIZE_BITS;
146}
147
148static inline qsize_t stoqb(qsize_t space)
149{
150 return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
151}
152
153static void copy_to_if_dqblk(struct if_dqblk *dst, struct qc_dqblk *src)
122{ 154{
123 memset(dst, 0, sizeof(*dst)); 155 memset(dst, 0, sizeof(*dst));
124 dst->dqb_bhardlimit = src->d_blk_hardlimit; 156 dst->dqb_bhardlimit = stoqb(src->d_spc_hardlimit);
125 dst->dqb_bsoftlimit = src->d_blk_softlimit; 157 dst->dqb_bsoftlimit = stoqb(src->d_spc_softlimit);
126 dst->dqb_curspace = src->d_bcount; 158 dst->dqb_curspace = src->d_space;
127 dst->dqb_ihardlimit = src->d_ino_hardlimit; 159 dst->dqb_ihardlimit = src->d_ino_hardlimit;
128 dst->dqb_isoftlimit = src->d_ino_softlimit; 160 dst->dqb_isoftlimit = src->d_ino_softlimit;
129 dst->dqb_curinodes = src->d_icount; 161 dst->dqb_curinodes = src->d_ino_count;
130 dst->dqb_btime = src->d_btimer; 162 dst->dqb_btime = src->d_spc_timer;
131 dst->dqb_itime = src->d_itimer; 163 dst->dqb_itime = src->d_ino_timer;
132 dst->dqb_valid = QIF_ALL; 164 dst->dqb_valid = QIF_ALL;
133} 165}
134 166
@@ -136,7 +168,7 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
136 void __user *addr) 168 void __user *addr)
137{ 169{
138 struct kqid qid; 170 struct kqid qid;
139 struct fs_disk_quota fdq; 171 struct qc_dqblk fdq;
140 struct if_dqblk idq; 172 struct if_dqblk idq;
141 int ret; 173 int ret;
142 174
@@ -154,36 +186,36 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
154 return 0; 186 return 0;
155} 187}
156 188
157static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src) 189static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src)
158{ 190{
159 dst->d_blk_hardlimit = src->dqb_bhardlimit; 191 dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit);
160 dst->d_blk_softlimit = src->dqb_bsoftlimit; 192 dst->d_spc_softlimit = qbtos(src->dqb_bsoftlimit);
161 dst->d_bcount = src->dqb_curspace; 193 dst->d_space = src->dqb_curspace;
162 dst->d_ino_hardlimit = src->dqb_ihardlimit; 194 dst->d_ino_hardlimit = src->dqb_ihardlimit;
163 dst->d_ino_softlimit = src->dqb_isoftlimit; 195 dst->d_ino_softlimit = src->dqb_isoftlimit;
164 dst->d_icount = src->dqb_curinodes; 196 dst->d_ino_count = src->dqb_curinodes;
165 dst->d_btimer = src->dqb_btime; 197 dst->d_spc_timer = src->dqb_btime;
166 dst->d_itimer = src->dqb_itime; 198 dst->d_ino_timer = src->dqb_itime;
167 199
168 dst->d_fieldmask = 0; 200 dst->d_fieldmask = 0;
169 if (src->dqb_valid & QIF_BLIMITS) 201 if (src->dqb_valid & QIF_BLIMITS)
170 dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD; 202 dst->d_fieldmask |= QC_SPC_SOFT | QC_SPC_HARD;
171 if (src->dqb_valid & QIF_SPACE) 203 if (src->dqb_valid & QIF_SPACE)
172 dst->d_fieldmask |= FS_DQ_BCOUNT; 204 dst->d_fieldmask |= QC_SPACE;
173 if (src->dqb_valid & QIF_ILIMITS) 205 if (src->dqb_valid & QIF_ILIMITS)
174 dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD; 206 dst->d_fieldmask |= QC_INO_SOFT | QC_INO_HARD;
175 if (src->dqb_valid & QIF_INODES) 207 if (src->dqb_valid & QIF_INODES)
176 dst->d_fieldmask |= FS_DQ_ICOUNT; 208 dst->d_fieldmask |= QC_INO_COUNT;
177 if (src->dqb_valid & QIF_BTIME) 209 if (src->dqb_valid & QIF_BTIME)
178 dst->d_fieldmask |= FS_DQ_BTIMER; 210 dst->d_fieldmask |= QC_SPC_TIMER;
179 if (src->dqb_valid & QIF_ITIME) 211 if (src->dqb_valid & QIF_ITIME)
180 dst->d_fieldmask |= FS_DQ_ITIMER; 212 dst->d_fieldmask |= QC_INO_TIMER;
181} 213}
182 214
183static int quota_setquota(struct super_block *sb, int type, qid_t id, 215static int quota_setquota(struct super_block *sb, int type, qid_t id,
184 void __user *addr) 216 void __user *addr)
185{ 217{
186 struct fs_disk_quota fdq; 218 struct qc_dqblk fdq;
187 struct if_dqblk idq; 219 struct if_dqblk idq;
188 struct kqid qid; 220 struct kqid qid;
189 221
@@ -198,15 +230,26 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id,
198 return sb->s_qcop->set_dqblk(sb, qid, &fdq); 230 return sb->s_qcop->set_dqblk(sb, qid, &fdq);
199} 231}
200 232
201static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) 233static int quota_enable(struct super_block *sb, void __user *addr)
234{
235 __u32 flags;
236
237 if (copy_from_user(&flags, addr, sizeof(flags)))
238 return -EFAULT;
239 if (!sb->s_qcop->quota_enable)
240 return -ENOSYS;
241 return sb->s_qcop->quota_enable(sb, flags);
242}
243
244static int quota_disable(struct super_block *sb, void __user *addr)
202{ 245{
203 __u32 flags; 246 __u32 flags;
204 247
205 if (copy_from_user(&flags, addr, sizeof(flags))) 248 if (copy_from_user(&flags, addr, sizeof(flags)))
206 return -EFAULT; 249 return -EFAULT;
207 if (!sb->s_qcop->set_xstate) 250 if (!sb->s_qcop->quota_disable)
208 return -ENOSYS; 251 return -ENOSYS;
209 return sb->s_qcop->set_xstate(sb, flags, cmd); 252 return sb->s_qcop->quota_disable(sb, flags);
210} 253}
211 254
212static int quota_getxstate(struct super_block *sb, void __user *addr) 255static int quota_getxstate(struct super_block *sb, void __user *addr)
@@ -247,10 +290,78 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
247 return ret; 290 return ret;
248} 291}
249 292
293/*
294 * XFS defines BBTOB and BTOBB macros inside fs/xfs/ and we cannot move them
295 * out of there as xfsprogs rely on definitions being in that header file. So
296 * just define same functions here for quota purposes.
297 */
298#define XFS_BB_SHIFT 9
299
300static inline u64 quota_bbtob(u64 blocks)
301{
302 return blocks << XFS_BB_SHIFT;
303}
304
305static inline u64 quota_btobb(u64 bytes)
306{
307 return (bytes + (1 << XFS_BB_SHIFT) - 1) >> XFS_BB_SHIFT;
308}
309
310static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src)
311{
312 dst->d_spc_hardlimit = quota_bbtob(src->d_blk_hardlimit);
313 dst->d_spc_softlimit = quota_bbtob(src->d_blk_softlimit);
314 dst->d_ino_hardlimit = src->d_ino_hardlimit;
315 dst->d_ino_softlimit = src->d_ino_softlimit;
316 dst->d_space = quota_bbtob(src->d_bcount);
317 dst->d_ino_count = src->d_icount;
318 dst->d_ino_timer = src->d_itimer;
319 dst->d_spc_timer = src->d_btimer;
320 dst->d_ino_warns = src->d_iwarns;
321 dst->d_spc_warns = src->d_bwarns;
322 dst->d_rt_spc_hardlimit = quota_bbtob(src->d_rtb_hardlimit);
323 dst->d_rt_spc_softlimit = quota_bbtob(src->d_rtb_softlimit);
324 dst->d_rt_space = quota_bbtob(src->d_rtbcount);
325 dst->d_rt_spc_timer = src->d_rtbtimer;
326 dst->d_rt_spc_warns = src->d_rtbwarns;
327 dst->d_fieldmask = 0;
328 if (src->d_fieldmask & FS_DQ_ISOFT)
329 dst->d_fieldmask |= QC_INO_SOFT;
330 if (src->d_fieldmask & FS_DQ_IHARD)
331 dst->d_fieldmask |= QC_INO_HARD;
332 if (src->d_fieldmask & FS_DQ_BSOFT)
333 dst->d_fieldmask |= QC_SPC_SOFT;
334 if (src->d_fieldmask & FS_DQ_BHARD)
335 dst->d_fieldmask |= QC_SPC_HARD;
336 if (src->d_fieldmask & FS_DQ_RTBSOFT)
337 dst->d_fieldmask |= QC_RT_SPC_SOFT;
338 if (src->d_fieldmask & FS_DQ_RTBHARD)
339 dst->d_fieldmask |= QC_RT_SPC_HARD;
340 if (src->d_fieldmask & FS_DQ_BTIMER)
341 dst->d_fieldmask |= QC_SPC_TIMER;
342 if (src->d_fieldmask & FS_DQ_ITIMER)
343 dst->d_fieldmask |= QC_INO_TIMER;
344 if (src->d_fieldmask & FS_DQ_RTBTIMER)
345 dst->d_fieldmask |= QC_RT_SPC_TIMER;
346 if (src->d_fieldmask & FS_DQ_BWARNS)
347 dst->d_fieldmask |= QC_SPC_WARNS;
348 if (src->d_fieldmask & FS_DQ_IWARNS)
349 dst->d_fieldmask |= QC_INO_WARNS;
350 if (src->d_fieldmask & FS_DQ_RTBWARNS)
351 dst->d_fieldmask |= QC_RT_SPC_WARNS;
352 if (src->d_fieldmask & FS_DQ_BCOUNT)
353 dst->d_fieldmask |= QC_SPACE;
354 if (src->d_fieldmask & FS_DQ_ICOUNT)
355 dst->d_fieldmask |= QC_INO_COUNT;
356 if (src->d_fieldmask & FS_DQ_RTBCOUNT)
357 dst->d_fieldmask |= QC_RT_SPACE;
358}
359
250static int quota_setxquota(struct super_block *sb, int type, qid_t id, 360static int quota_setxquota(struct super_block *sb, int type, qid_t id,
251 void __user *addr) 361 void __user *addr)
252{ 362{
253 struct fs_disk_quota fdq; 363 struct fs_disk_quota fdq;
364 struct qc_dqblk qdq;
254 struct kqid qid; 365 struct kqid qid;
255 366
256 if (copy_from_user(&fdq, addr, sizeof(fdq))) 367 if (copy_from_user(&fdq, addr, sizeof(fdq)))
@@ -260,13 +371,44 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
260 qid = make_kqid(current_user_ns(), type, id); 371 qid = make_kqid(current_user_ns(), type, id);
261 if (!qid_valid(qid)) 372 if (!qid_valid(qid))
262 return -EINVAL; 373 return -EINVAL;
263 return sb->s_qcop->set_dqblk(sb, qid, &fdq); 374 copy_from_xfs_dqblk(&qdq, &fdq);
375 return sb->s_qcop->set_dqblk(sb, qid, &qdq);
376}
377
378static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src,
379 int type, qid_t id)
380{
381 memset(dst, 0, sizeof(*dst));
382 dst->d_version = FS_DQUOT_VERSION;
383 dst->d_id = id;
384 if (type == USRQUOTA)
385 dst->d_flags = FS_USER_QUOTA;
386 else if (type == PRJQUOTA)
387 dst->d_flags = FS_PROJ_QUOTA;
388 else
389 dst->d_flags = FS_GROUP_QUOTA;
390 dst->d_blk_hardlimit = quota_btobb(src->d_spc_hardlimit);
391 dst->d_blk_softlimit = quota_btobb(src->d_spc_softlimit);
392 dst->d_ino_hardlimit = src->d_ino_hardlimit;
393 dst->d_ino_softlimit = src->d_ino_softlimit;
394 dst->d_bcount = quota_btobb(src->d_space);
395 dst->d_icount = src->d_ino_count;
396 dst->d_itimer = src->d_ino_timer;
397 dst->d_btimer = src->d_spc_timer;
398 dst->d_iwarns = src->d_ino_warns;
399 dst->d_bwarns = src->d_spc_warns;
400 dst->d_rtb_hardlimit = quota_btobb(src->d_rt_spc_hardlimit);
401 dst->d_rtb_softlimit = quota_btobb(src->d_rt_spc_softlimit);
402 dst->d_rtbcount = quota_btobb(src->d_rt_space);
403 dst->d_rtbtimer = src->d_rt_spc_timer;
404 dst->d_rtbwarns = src->d_rt_spc_warns;
264} 405}
265 406
266static int quota_getxquota(struct super_block *sb, int type, qid_t id, 407static int quota_getxquota(struct super_block *sb, int type, qid_t id,
267 void __user *addr) 408 void __user *addr)
268{ 409{
269 struct fs_disk_quota fdq; 410 struct fs_disk_quota fdq;
411 struct qc_dqblk qdq;
270 struct kqid qid; 412 struct kqid qid;
271 int ret; 413 int ret;
272 414
@@ -275,8 +417,11 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
275 qid = make_kqid(current_user_ns(), type, id); 417 qid = make_kqid(current_user_ns(), type, id);
276 if (!qid_valid(qid)) 418 if (!qid_valid(qid))
277 return -EINVAL; 419 return -EINVAL;
278 ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); 420 ret = sb->s_qcop->get_dqblk(sb, qid, &qdq);
279 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) 421 if (ret)
422 return ret;
423 copy_to_xfs_dqblk(&fdq, &qdq, type, id);
424 if (copy_to_user(addr, &fdq, sizeof(fdq)))
280 return -EFAULT; 425 return -EFAULT;
281 return ret; 426 return ret;
282} 427}
@@ -317,9 +462,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
317 case Q_QUOTAON: 462 case Q_QUOTAON:
318 return quota_quotaon(sb, type, cmd, id, path); 463 return quota_quotaon(sb, type, cmd, id, path);
319 case Q_QUOTAOFF: 464 case Q_QUOTAOFF:
320 if (!sb->s_qcop->quota_off) 465 return quota_quotaoff(sb, type);
321 return -ENOSYS;
322 return sb->s_qcop->quota_off(sb, type);
323 case Q_GETFMT: 466 case Q_GETFMT:
324 return quota_getfmt(sb, type, addr); 467 return quota_getfmt(sb, type, addr);
325 case Q_GETINFO: 468 case Q_GETINFO:
@@ -335,8 +478,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
335 return -ENOSYS; 478 return -ENOSYS;
336 return sb->s_qcop->quota_sync(sb, type); 479 return sb->s_qcop->quota_sync(sb, type);
337 case Q_XQUOTAON: 480 case Q_XQUOTAON:
481 return quota_enable(sb, addr);
338 case Q_XQUOTAOFF: 482 case Q_XQUOTAOFF:
339 return quota_setxstate(sb, cmd, addr); 483 return quota_disable(sb, addr);
340 case Q_XQUOTARM: 484 case Q_XQUOTARM:
341 return quota_rmxquota(sb, addr); 485 return quota_rmxquota(sb, addr);
342 case Q_XGETQSTAT: 486 case Q_XGETQSTAT:
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 469c6848b322..8fe79beced5c 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -169,8 +169,8 @@ static int v1_read_file_info(struct super_block *sb, int type)
169 } 169 }
170 ret = 0; 170 ret = 0;
171 /* limits are stored as unsigned 32-bit data */ 171 /* limits are stored as unsigned 32-bit data */
172 dqopt->info[type].dqi_maxblimit = 0xffffffff; 172 dqopt->info[type].dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS;
173 dqopt->info[type].dqi_maxilimit = 0xffffffff; 173 dqopt->info[type].dqi_max_ino_limit = 0xffffffff;
174 dqopt->info[type].dqi_igrace = 174 dqopt->info[type].dqi_igrace =
175 dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; 175 dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
176 dqopt->info[type].dqi_bgrace = 176 dqopt->info[type].dqi_bgrace =
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 02751ec695c5..9cb10d7197f7 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -117,16 +117,17 @@ static int v2_read_file_info(struct super_block *sb, int type)
117 qinfo = info->dqi_priv; 117 qinfo = info->dqi_priv;
118 if (version == 0) { 118 if (version == 0) {
119 /* limits are stored as unsigned 32-bit data */ 119 /* limits are stored as unsigned 32-bit data */
120 info->dqi_maxblimit = 0xffffffff; 120 info->dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS;
121 info->dqi_maxilimit = 0xffffffff; 121 info->dqi_max_ino_limit = 0xffffffff;
122 } else { 122 } else {
123 /* used space is stored as unsigned 64-bit value */ 123 /* used space is stored as unsigned 64-bit value in bytes */
124 info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */ 124 info->dqi_max_spc_limit = 0xffffffffffffffffULL; /* 2^64-1 */
125 info->dqi_maxilimit = 0xffffffffffffffffULL; 125 info->dqi_max_ino_limit = 0xffffffffffffffffULL;
126 } 126 }
127 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 127 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
128 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 128 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
129 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); 129 /* No flags currently supported */
130 info->dqi_flags = 0;
130 qinfo->dqi_sb = sb; 131 qinfo->dqi_sb = sb;
131 qinfo->dqi_type = type; 132 qinfo->dqi_type = type;
132 qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 133 qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
@@ -157,7 +158,8 @@ static int v2_write_file_info(struct super_block *sb, int type)
157 info->dqi_flags &= ~DQF_INFO_DIRTY; 158 info->dqi_flags &= ~DQF_INFO_DIRTY;
158 dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); 159 dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
159 dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); 160 dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
160 dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); 161 /* No flags currently supported */
162 dinfo.dqi_flags = cpu_to_le32(0);
161 spin_unlock(&dq_data_lock); 163 spin_unlock(&dq_data_lock);
162 dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks); 164 dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks);
163 dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk); 165 dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index bbafbde3471a..f6ab41b39612 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -34,7 +34,14 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
34 unsigned long flags); 34 unsigned long flags);
35static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); 35static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
36 36
37static unsigned ramfs_mmap_capabilities(struct file *file)
38{
39 return NOMMU_MAP_DIRECT | NOMMU_MAP_COPY | NOMMU_MAP_READ |
40 NOMMU_MAP_WRITE | NOMMU_MAP_EXEC;
41}
42
37const struct file_operations ramfs_file_operations = { 43const struct file_operations ramfs_file_operations = {
44 .mmap_capabilities = ramfs_mmap_capabilities,
38 .mmap = ramfs_nommu_mmap, 45 .mmap = ramfs_nommu_mmap,
39 .get_unmapped_area = ramfs_nommu_get_unmapped_area, 46 .get_unmapped_area = ramfs_nommu_get_unmapped_area,
40 .read = new_sync_read, 47 .read = new_sync_read,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index d365b1c4eb3c..889d558b4e05 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -50,14 +50,6 @@ static const struct address_space_operations ramfs_aops = {
50 .set_page_dirty = __set_page_dirty_no_writeback, 50 .set_page_dirty = __set_page_dirty_no_writeback,
51}; 51};
52 52
53static struct backing_dev_info ramfs_backing_dev_info = {
54 .name = "ramfs",
55 .ra_pages = 0, /* No readahead */
56 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
57 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
58 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
59};
60
61struct inode *ramfs_get_inode(struct super_block *sb, 53struct inode *ramfs_get_inode(struct super_block *sb,
62 const struct inode *dir, umode_t mode, dev_t dev) 54 const struct inode *dir, umode_t mode, dev_t dev)
63{ 55{
@@ -67,7 +59,6 @@ struct inode *ramfs_get_inode(struct super_block *sb,
67 inode->i_ino = get_next_ino(); 59 inode->i_ino = get_next_ino();
68 inode_init_owner(inode, dir, mode); 60 inode_init_owner(inode, dir, mode);
69 inode->i_mapping->a_ops = &ramfs_aops; 61 inode->i_mapping->a_ops = &ramfs_aops;
70 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
71 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); 62 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
72 mapping_set_unevictable(inode->i_mapping); 63 mapping_set_unevictable(inode->i_mapping);
73 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 64 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -267,19 +258,9 @@ static struct file_system_type ramfs_fs_type = {
267int __init init_ramfs_fs(void) 258int __init init_ramfs_fs(void)
268{ 259{
269 static unsigned long once; 260 static unsigned long once;
270 int err;
271 261
272 if (test_and_set_bit(0, &once)) 262 if (test_and_set_bit(0, &once))
273 return 0; 263 return 0;
274 264 return register_filesystem(&ramfs_fs_type);
275 err = bdi_init(&ramfs_backing_dev_info);
276 if (err)
277 return err;
278
279 err = register_filesystem(&ramfs_fs_type);
280 if (err)
281 bdi_destroy(&ramfs_backing_dev_info);
282
283 return err;
284} 265}
285fs_initcall(init_ramfs_fs); 266fs_initcall(init_ramfs_fs);
diff --git a/fs/read_write.c b/fs/read_write.c
index c0805c93b6fa..4060691e78f7 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -358,7 +358,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
358 return retval; 358 return retval;
359 } 359 }
360 360
361 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 361 if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
362 retval = locks_mandatory_area( 362 retval = locks_mandatory_area(
363 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 363 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
364 inode, file, pos, count); 364 inode, file, pos, count);
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index ea06c7554860..7da9e2153953 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -70,6 +70,15 @@ static int romfs_mmap(struct file *file, struct vm_area_struct *vma)
70 return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS; 70 return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS;
71} 71}
72 72
73static unsigned romfs_mmap_capabilities(struct file *file)
74{
75 struct mtd_info *mtd = file_inode(file)->i_sb->s_mtd;
76
77 if (!mtd)
78 return NOMMU_MAP_COPY;
79 return mtd_mmap_capabilities(mtd);
80}
81
73const struct file_operations romfs_ro_fops = { 82const struct file_operations romfs_ro_fops = {
74 .llseek = generic_file_llseek, 83 .llseek = generic_file_llseek,
75 .read = new_sync_read, 84 .read = new_sync_read,
@@ -77,4 +86,5 @@ const struct file_operations romfs_ro_fops = {
77 .splice_read = generic_file_splice_read, 86 .splice_read = generic_file_splice_read,
78 .mmap = romfs_mmap, 87 .mmap = romfs_mmap,
79 .get_unmapped_area = romfs_get_unmapped_area, 88 .get_unmapped_area = romfs_get_unmapped_area,
89 .mmap_capabilities = romfs_mmap_capabilities,
80}; 90};
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index e98dd88197d5..268733cda397 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -355,9 +355,6 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
355 case ROMFH_REG: 355 case ROMFH_REG:
356 i->i_fop = &romfs_ro_fops; 356 i->i_fop = &romfs_ro_fops;
357 i->i_data.a_ops = &romfs_aops; 357 i->i_data.a_ops = &romfs_aops;
358 if (i->i_sb->s_mtd)
359 i->i_data.backing_dev_info =
360 i->i_sb->s_mtd->backing_dev_info;
361 if (nextfh & ROMFH_EXEC) 358 if (nextfh & ROMFH_EXEC)
362 mode |= S_IXUGO; 359 mode |= S_IXUGO;
363 break; 360 break;
diff --git a/fs/super.c b/fs/super.c
index eae088f6aaae..05a021638b11 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -36,8 +36,8 @@
36#include "internal.h" 36#include "internal.h"
37 37
38 38
39LIST_HEAD(super_blocks); 39static LIST_HEAD(super_blocks);
40DEFINE_SPINLOCK(sb_lock); 40static DEFINE_SPINLOCK(sb_lock);
41 41
42static char *sb_writers_name[SB_FREEZE_LEVELS] = { 42static char *sb_writers_name[SB_FREEZE_LEVELS] = {
43 "sb_writers", 43 "sb_writers",
@@ -185,8 +185,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
185 } 185 }
186 init_waitqueue_head(&s->s_writers.wait); 186 init_waitqueue_head(&s->s_writers.wait);
187 init_waitqueue_head(&s->s_writers.wait_unfrozen); 187 init_waitqueue_head(&s->s_writers.wait_unfrozen);
188 s->s_bdi = &noop_backing_dev_info;
188 s->s_flags = flags; 189 s->s_flags = flags;
189 s->s_bdi = &default_backing_dev_info;
190 INIT_HLIST_NODE(&s->s_instances); 190 INIT_HLIST_NODE(&s->s_instances);
191 INIT_HLIST_BL_HEAD(&s->s_anon); 191 INIT_HLIST_BL_HEAD(&s->s_anon);
192 INIT_LIST_HEAD(&s->s_inodes); 192 INIT_LIST_HEAD(&s->s_inodes);
@@ -863,10 +863,7 @@ EXPORT_SYMBOL(free_anon_bdev);
863 863
864int set_anon_super(struct super_block *s, void *data) 864int set_anon_super(struct super_block *s, void *data)
865{ 865{
866 int error = get_anon_bdev(&s->s_dev); 866 return get_anon_bdev(&s->s_dev);
867 if (!error)
868 s->s_bdi = &noop_backing_dev_info;
869 return error;
870} 867}
871 868
872EXPORT_SYMBOL(set_anon_super); 869EXPORT_SYMBOL(set_anon_super);
@@ -1111,7 +1108,6 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1111 sb = root->d_sb; 1108 sb = root->d_sb;
1112 BUG_ON(!sb); 1109 BUG_ON(!sb);
1113 WARN_ON(!sb->s_bdi); 1110 WARN_ON(!sb->s_bdi);
1114 WARN_ON(sb->s_bdi == &default_backing_dev_info);
1115 sb->s_flags |= MS_BORN; 1111 sb->s_flags |= MS_BORN;
1116 1112
1117 error = security_sb_kern_mount(sb, flags, secdata); 1113 error = security_sb_kern_mount(sb, flags, secdata);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ea41649e4ca5..c49b1981ac95 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -108,8 +108,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
108 inode->i_mtime = inode->i_atime = inode->i_ctime = 108 inode->i_mtime = inode->i_atime = inode->i_ctime =
109 ubifs_current_time(inode); 109 ubifs_current_time(inode);
110 inode->i_mapping->nrpages = 0; 110 inode->i_mapping->nrpages = 0;
111 /* Disable readahead */
112 inode->i_mapping->backing_dev_info = &c->bdi;
113 111
114 switch (mode & S_IFMT) { 112 switch (mode & S_IFMT) {
115 case S_IFREG: 113 case S_IFREG:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 538519ee37d9..035e51011444 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1536,7 +1536,6 @@ static const struct vm_operations_struct ubifs_file_vm_ops = {
1536 .fault = filemap_fault, 1536 .fault = filemap_fault,
1537 .map_pages = filemap_map_pages, 1537 .map_pages = filemap_map_pages,
1538 .page_mkwrite = ubifs_vm_page_mkwrite, 1538 .page_mkwrite = ubifs_vm_page_mkwrite,
1539 .remap_pages = generic_file_remap_pages,
1540}; 1539};
1541 1540
1542static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) 1541static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 106bf20629ce..6197154f36ca 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -156,9 +156,6 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
156 if (err) 156 if (err)
157 goto out_invalid; 157 goto out_invalid;
158 158
159 /* Disable read-ahead */
160 inode->i_mapping->backing_dev_info = &c->bdi;
161
162 switch (inode->i_mode & S_IFMT) { 159 switch (inode->i_mode & S_IFMT) {
163 case S_IFREG: 160 case S_IFREG:
164 inode->i_mapping->a_ops = &ubifs_file_address_operations; 161 inode->i_mapping->a_ops = &ubifs_file_address_operations;
@@ -2017,7 +2014,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
2017 * Read-ahead will be disabled because @c->bdi.ra_pages is 0. 2014 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
2018 */ 2015 */
2019 c->bdi.name = "ubifs", 2016 c->bdi.name = "ubifs",
2020 c->bdi.capabilities = BDI_CAP_MAP_COPY; 2017 c->bdi.capabilities = 0;
2021 err = bdi_init(&c->bdi); 2018 err = bdi_init(&c->bdi);
2022 if (err) 2019 if (err)
2023 goto out_close; 2020 goto out_close;
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index 0e0e99bd6bce..c6e17a744c3b 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -2,10 +2,12 @@ config UDF_FS
2 tristate "UDF file system support" 2 tristate "UDF file system support"
3 select CRC_ITU_T 3 select CRC_ITU_T
4 help 4 help
5 This is the new file system used on some CD-ROMs and DVDs. Say Y if 5 This is a file system used on some CD-ROMs and DVDs. Since the
6 you intend to mount DVD discs or CDRW's written in packet mode, or 6 file system is supported by multiple operating systems and is more
7 if written to by other UDF utilities, such as DirectCD. 7 compatible with standard unix file systems, it is also suitable for
8 Please read <file:Documentation/filesystems/udf.txt>. 8 removable USB disks. Say Y if you intend to mount DVD discs or CDRW's
9 written in packet mode, or if you want to use UDF for removable USB
10 disks. Please read <file:Documentation/filesystems/udf.txt>.
9 11
10 To compile this file system support as a module, choose M here: the 12 To compile this file system support as a module, choose M here: the
11 module will be called udf. 13 module will be called udf.
diff --git a/fs/udf/file.c b/fs/udf/file.c
index bb15771b92ae..08f3555fbeac 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -224,7 +224,7 @@ out:
224static int udf_release_file(struct inode *inode, struct file *filp) 224static int udf_release_file(struct inode *inode, struct file *filp)
225{ 225{
226 if (filp->f_mode & FMODE_WRITE && 226 if (filp->f_mode & FMODE_WRITE &&
227 atomic_read(&inode->i_writecount) > 1) { 227 atomic_read(&inode->i_writecount) == 1) {
228 /* 228 /*
229 * Grab i_mutex to avoid races with writes changing i_size 229 * Grab i_mutex to avoid races with writes changing i_size
230 * while we are running. 230 * while we are running.
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 5bc71d9a674a..a445d599098d 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -750,7 +750,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
750 /* Are we beyond EOF? */ 750 /* Are we beyond EOF? */
751 if (etype == -1) { 751 if (etype == -1) {
752 int ret; 752 int ret;
753 isBeyondEOF = 1; 753 isBeyondEOF = true;
754 if (count) { 754 if (count) {
755 if (c) 755 if (c)
756 laarr[0] = laarr[1]; 756 laarr[0] = laarr[1];
@@ -792,7 +792,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
792 endnum = c + 1; 792 endnum = c + 1;
793 lastblock = 1; 793 lastblock = 1;
794 } else { 794 } else {
795 isBeyondEOF = 0; 795 isBeyondEOF = false;
796 endnum = startnum = ((count > 2) ? 2 : count); 796 endnum = startnum = ((count > 2) ? 2 : count);
797 797
798 /* if the current extent is in position 0, 798 /* if the current extent is in position 0,
@@ -1288,6 +1288,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode)
1288 struct kernel_lb_addr *iloc = &iinfo->i_location; 1288 struct kernel_lb_addr *iloc = &iinfo->i_location;
1289 unsigned int link_count; 1289 unsigned int link_count;
1290 unsigned int indirections = 0; 1290 unsigned int indirections = 0;
1291 int bs = inode->i_sb->s_blocksize;
1291 int ret = -EIO; 1292 int ret = -EIO;
1292 1293
1293reread: 1294reread:
@@ -1374,38 +1375,35 @@ reread:
1374 if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { 1375 if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) {
1375 iinfo->i_efe = 1; 1376 iinfo->i_efe = 1;
1376 iinfo->i_use = 0; 1377 iinfo->i_use = 0;
1377 ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - 1378 ret = udf_alloc_i_data(inode, bs -
1378 sizeof(struct extendedFileEntry)); 1379 sizeof(struct extendedFileEntry));
1379 if (ret) 1380 if (ret)
1380 goto out; 1381 goto out;
1381 memcpy(iinfo->i_ext.i_data, 1382 memcpy(iinfo->i_ext.i_data,
1382 bh->b_data + sizeof(struct extendedFileEntry), 1383 bh->b_data + sizeof(struct extendedFileEntry),
1383 inode->i_sb->s_blocksize - 1384 bs - sizeof(struct extendedFileEntry));
1384 sizeof(struct extendedFileEntry));
1385 } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { 1385 } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) {
1386 iinfo->i_efe = 0; 1386 iinfo->i_efe = 0;
1387 iinfo->i_use = 0; 1387 iinfo->i_use = 0;
1388 ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - 1388 ret = udf_alloc_i_data(inode, bs - sizeof(struct fileEntry));
1389 sizeof(struct fileEntry));
1390 if (ret) 1389 if (ret)
1391 goto out; 1390 goto out;
1392 memcpy(iinfo->i_ext.i_data, 1391 memcpy(iinfo->i_ext.i_data,
1393 bh->b_data + sizeof(struct fileEntry), 1392 bh->b_data + sizeof(struct fileEntry),
1394 inode->i_sb->s_blocksize - sizeof(struct fileEntry)); 1393 bs - sizeof(struct fileEntry));
1395 } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { 1394 } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) {
1396 iinfo->i_efe = 0; 1395 iinfo->i_efe = 0;
1397 iinfo->i_use = 1; 1396 iinfo->i_use = 1;
1398 iinfo->i_lenAlloc = le32_to_cpu( 1397 iinfo->i_lenAlloc = le32_to_cpu(
1399 ((struct unallocSpaceEntry *)bh->b_data)-> 1398 ((struct unallocSpaceEntry *)bh->b_data)->
1400 lengthAllocDescs); 1399 lengthAllocDescs);
1401 ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - 1400 ret = udf_alloc_i_data(inode, bs -
1402 sizeof(struct unallocSpaceEntry)); 1401 sizeof(struct unallocSpaceEntry));
1403 if (ret) 1402 if (ret)
1404 goto out; 1403 goto out;
1405 memcpy(iinfo->i_ext.i_data, 1404 memcpy(iinfo->i_ext.i_data,
1406 bh->b_data + sizeof(struct unallocSpaceEntry), 1405 bh->b_data + sizeof(struct unallocSpaceEntry),
1407 inode->i_sb->s_blocksize - 1406 bs - sizeof(struct unallocSpaceEntry));
1408 sizeof(struct unallocSpaceEntry));
1409 return 0; 1407 return 0;
1410 } 1408 }
1411 1409
@@ -1489,6 +1487,15 @@ reread:
1489 } 1487 }
1490 inode->i_generation = iinfo->i_unique; 1488 inode->i_generation = iinfo->i_unique;
1491 1489
1490 /*
1491 * Sanity check length of allocation descriptors and extended attrs to
1492 * avoid integer overflows
1493 */
1494 if (iinfo->i_lenEAttr > bs || iinfo->i_lenAlloc > bs)
1495 goto out;
1496 /* Now do exact checks */
1497 if (udf_file_entry_alloc_offset(inode) + iinfo->i_lenAlloc > bs)
1498 goto out;
1492 /* Sanity checks for files in ICB so that we don't get confused later */ 1499 /* Sanity checks for files in ICB so that we don't get confused later */
1493 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1500 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1494 /* 1501 /*
@@ -1498,8 +1505,7 @@ reread:
1498 if (iinfo->i_lenAlloc != inode->i_size) 1505 if (iinfo->i_lenAlloc != inode->i_size)
1499 goto out; 1506 goto out;
1500 /* File in ICB has to fit in there... */ 1507 /* File in ICB has to fit in there... */
1501 if (inode->i_size > inode->i_sb->s_blocksize - 1508 if (inode->i_size > bs - udf_file_entry_alloc_offset(inode))
1502 udf_file_entry_alloc_offset(inode))
1503 goto out; 1509 goto out;
1504 } 1510 }
1505 1511
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3ccb2f11fc76..f169411c4ea0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1599,7 +1599,7 @@ static noinline int udf_process_sequence(
1599 struct udf_vds_record *curr; 1599 struct udf_vds_record *curr;
1600 struct generic_desc *gd; 1600 struct generic_desc *gd;
1601 struct volDescPtr *vdp; 1601 struct volDescPtr *vdp;
1602 int done = 0; 1602 bool done = false;
1603 uint32_t vdsn; 1603 uint32_t vdsn;
1604 uint16_t ident; 1604 uint16_t ident;
1605 long next_s = 0, next_e = 0; 1605 long next_s = 0, next_e = 0;
@@ -1680,7 +1680,7 @@ static noinline int udf_process_sequence(
1680 lastblock = next_e; 1680 lastblock = next_e;
1681 next_s = next_e = 0; 1681 next_s = next_e = 0;
1682 } else 1682 } else
1683 done = 1; 1683 done = true;
1684 break; 1684 break;
1685 } 1685 }
1686 brelse(bh); 1686 brelse(bh);
@@ -2300,6 +2300,7 @@ static void udf_put_super(struct super_block *sb)
2300 udf_close_lvid(sb); 2300 udf_close_lvid(sb);
2301 brelse(sbi->s_lvid_bh); 2301 brelse(sbi->s_lvid_bh);
2302 udf_sb_free_partitions(sb); 2302 udf_sb_free_partitions(sb);
2303 mutex_destroy(&sbi->s_alloc_mutex);
2303 kfree(sb->s_fs_info); 2304 kfree(sb->s_fs_info);
2304 sb->s_fs_info = NULL; 2305 sb->s_fs_info = NULL;
2305} 2306}
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 53e95b2a1369..a7a3a63bb360 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -91,16 +91,6 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
91 return ptr; 91 return ptr;
92} 92}
93 93
94void
95kmem_free(const void *ptr)
96{
97 if (!is_vmalloc_addr(ptr)) {
98 kfree(ptr);
99 } else {
100 vfree(ptr);
101 }
102}
103
104void * 94void *
105kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, 95kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
106 xfs_km_flags_t flags) 96 xfs_km_flags_t flags)
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 64db0e53edea..cc6b768fc068 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -63,7 +63,10 @@ kmem_flags_convert(xfs_km_flags_t flags)
63extern void *kmem_alloc(size_t, xfs_km_flags_t); 63extern void *kmem_alloc(size_t, xfs_km_flags_t);
64extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); 64extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
65extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); 65extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);
66extern void kmem_free(const void *); 66static inline void kmem_free(const void *ptr)
67{
68 kvfree(ptr);
69}
67 70
68 71
69extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); 72extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 5d38e8b8a913..15105dbc9e28 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -403,7 +403,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
403 if (!xfs_sb_version_hasattr2(&mp->m_sb)) { 403 if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
404 xfs_sb_version_addattr2(&mp->m_sb); 404 xfs_sb_version_addattr2(&mp->m_sb);
405 spin_unlock(&mp->m_sb_lock); 405 spin_unlock(&mp->m_sb_lock);
406 xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); 406 xfs_log_sb(tp);
407 } else 407 } else
408 spin_unlock(&mp->m_sb_lock); 408 spin_unlock(&mp->m_sb_lock);
409 } 409 }
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index b5eb4743f75a..61ec015dca16 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -973,7 +973,11 @@ xfs_bmap_local_to_extents(
973 *firstblock = args.fsbno; 973 *firstblock = args.fsbno;
974 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 974 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
975 975
976 /* initialise the block and copy the data */ 976 /*
977 * Initialise the block and copy the data
978 *
979 * Note: init_fn must set the buffer log item type correctly!
980 */
977 init_fn(tp, bp, ip, ifp); 981 init_fn(tp, bp, ip, ifp);
978 982
979 /* account for the change in fork size and log everything */ 983 /* account for the change in fork size and log everything */
@@ -1221,22 +1225,20 @@ xfs_bmap_add_attrfork(
1221 goto bmap_cancel; 1225 goto bmap_cancel;
1222 if (!xfs_sb_version_hasattr(&mp->m_sb) || 1226 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1223 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { 1227 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1224 __int64_t sbfields = 0; 1228 bool log_sb = false;
1225 1229
1226 spin_lock(&mp->m_sb_lock); 1230 spin_lock(&mp->m_sb_lock);
1227 if (!xfs_sb_version_hasattr(&mp->m_sb)) { 1231 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1228 xfs_sb_version_addattr(&mp->m_sb); 1232 xfs_sb_version_addattr(&mp->m_sb);
1229 sbfields |= XFS_SB_VERSIONNUM; 1233 log_sb = true;
1230 } 1234 }
1231 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { 1235 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1232 xfs_sb_version_addattr2(&mp->m_sb); 1236 xfs_sb_version_addattr2(&mp->m_sb);
1233 sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2); 1237 log_sb = true;
1234 } 1238 }
1235 if (sbfields) { 1239 spin_unlock(&mp->m_sb_lock);
1236 spin_unlock(&mp->m_sb_lock); 1240 if (log_sb)
1237 xfs_mod_sb(tp, sbfields); 1241 xfs_log_sb(tp);
1238 } else
1239 spin_unlock(&mp->m_sb_lock);
1240 } 1242 }
1241 1243
1242 error = xfs_bmap_finish(&tp, &flist, &committed); 1244 error = xfs_bmap_finish(&tp, &flist, &committed);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 44db6db86402..b9d8a499d2c4 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -28,6 +28,37 @@ struct xfs_trans;
28extern kmem_zone_t *xfs_bmap_free_item_zone; 28extern kmem_zone_t *xfs_bmap_free_item_zone;
29 29
30/* 30/*
31 * Argument structure for xfs_bmap_alloc.
32 */
33struct xfs_bmalloca {
34 xfs_fsblock_t *firstblock; /* i/o first block allocated */
35 struct xfs_bmap_free *flist; /* bmap freelist */
36 struct xfs_trans *tp; /* transaction pointer */
37 struct xfs_inode *ip; /* incore inode pointer */
38 struct xfs_bmbt_irec prev; /* extent before the new one */
39 struct xfs_bmbt_irec got; /* extent after, or delayed */
40
41 xfs_fileoff_t offset; /* offset in file filling in */
42 xfs_extlen_t length; /* i/o length asked/allocated */
43 xfs_fsblock_t blkno; /* starting block of new extent */
44
45 struct xfs_btree_cur *cur; /* btree cursor */
46 xfs_extnum_t idx; /* current extent index */
47 int nallocs;/* number of extents alloc'd */
48 int logflags;/* flags for transaction logging */
49
50 xfs_extlen_t total; /* total blocks needed for xaction */
51 xfs_extlen_t minlen; /* minimum allocation size (blocks) */
52 xfs_extlen_t minleft; /* amount must be left after alloc */
53 bool eof; /* set if allocating past last extent */
54 bool wasdel; /* replacing a delayed allocation */
55 bool userdata;/* set if is user data */
56 bool aeof; /* allocated space at eof */
57 bool conv; /* overwriting unwritten extents */
58 int flags;
59};
60
61/*
31 * List of extents to be free "later". 62 * List of extents to be free "later".
32 * The list is kept sorted on xbf_startblock. 63 * The list is kept sorted on xbf_startblock.
33 */ 64 */
@@ -149,6 +180,8 @@ void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
149void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, 180void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
150 struct xfs_bmap_free *flist, struct xfs_mount *mp); 181 struct xfs_bmap_free *flist, struct xfs_mount *mp);
151void xfs_bmap_cancel(struct xfs_bmap_free *flist); 182void xfs_bmap_cancel(struct xfs_bmap_free *flist);
183int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
184 int *committed);
152void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); 185void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
153int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, 186int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
154 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); 187 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index fbd6da263571..8eb718979383 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -151,10 +151,13 @@ typedef struct xfs_sb {
151 __uint32_t sb_features2; /* additional feature bits */ 151 __uint32_t sb_features2; /* additional feature bits */
152 152
153 /* 153 /*
154 * bad features2 field as a result of failing to pad the sb 154 * bad features2 field as a result of failing to pad the sb structure to
155 * structure to 64 bits. Some machines will be using this field 155 * 64 bits. Some machines will be using this field for features2 bits.
156 * for features2 bits. Easiest just to mark it bad and not use 156 * Easiest just to mark it bad and not use it for anything else.
157 * it for anything else. 157 *
158 * This is not kept up to date in memory; it is always overwritten by
159 * the value in sb_features2 when formatting the incore superblock to
160 * the disk buffer.
158 */ 161 */
159 __uint32_t sb_bad_features2; 162 __uint32_t sb_bad_features2;
160 163
@@ -304,8 +307,8 @@ typedef enum {
304#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) 307#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
305#define XFS_SB_IFREE XFS_SB_MVAL(IFREE) 308#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
306#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) 309#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
307#define XFS_SB_FEATURES2 XFS_SB_MVAL(FEATURES2) 310#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \
308#define XFS_SB_BAD_FEATURES2 XFS_SB_MVAL(BAD_FEATURES2) 311 XFS_SB_MVAL(BAD_FEATURES2))
309#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) 312#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
310#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) 313#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
311#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) 314#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
@@ -319,9 +322,9 @@ typedef enum {
319 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ 322 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
320 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ 323 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
321 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ 324 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
322 XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \ 325 XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
323 XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \ 326 XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
324 XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO) 327 XFS_SB_PQUOTINO)
325 328
326 329
327/* 330/*
@@ -453,13 +456,11 @@ static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp)
453{ 456{
454 sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; 457 sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
455 sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; 458 sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT;
456 sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT;
457} 459}
458 460
459static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp) 461static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp)
460{ 462{
461 sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; 463 sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
462 sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
463 if (!sbp->sb_features2) 464 if (!sbp->sb_features2)
464 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; 465 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
465} 466}
@@ -475,7 +476,6 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
475{ 476{
476 sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; 477 sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
477 sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; 478 sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT;
478 sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT;
479} 479}
480 480
481/* 481/*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 18dc721ca19f..18dc721ca19f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 752915fa775a..b0a5fe95a3e2 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -40,69 +40,6 @@
40 * Physical superblock buffer manipulations. Shared with libxfs in userspace. 40 * Physical superblock buffer manipulations. Shared with libxfs in userspace.
41 */ 41 */
42 42
43static const struct {
44 short offset;
45 short type; /* 0 = integer
46 * 1 = binary / string (no translation)
47 */
48} xfs_sb_info[] = {
49 { offsetof(xfs_sb_t, sb_magicnum), 0 },
50 { offsetof(xfs_sb_t, sb_blocksize), 0 },
51 { offsetof(xfs_sb_t, sb_dblocks), 0 },
52 { offsetof(xfs_sb_t, sb_rblocks), 0 },
53 { offsetof(xfs_sb_t, sb_rextents), 0 },
54 { offsetof(xfs_sb_t, sb_uuid), 1 },
55 { offsetof(xfs_sb_t, sb_logstart), 0 },
56 { offsetof(xfs_sb_t, sb_rootino), 0 },
57 { offsetof(xfs_sb_t, sb_rbmino), 0 },
58 { offsetof(xfs_sb_t, sb_rsumino), 0 },
59 { offsetof(xfs_sb_t, sb_rextsize), 0 },
60 { offsetof(xfs_sb_t, sb_agblocks), 0 },
61 { offsetof(xfs_sb_t, sb_agcount), 0 },
62 { offsetof(xfs_sb_t, sb_rbmblocks), 0 },
63 { offsetof(xfs_sb_t, sb_logblocks), 0 },
64 { offsetof(xfs_sb_t, sb_versionnum), 0 },
65 { offsetof(xfs_sb_t, sb_sectsize), 0 },
66 { offsetof(xfs_sb_t, sb_inodesize), 0 },
67 { offsetof(xfs_sb_t, sb_inopblock), 0 },
68 { offsetof(xfs_sb_t, sb_fname[0]), 1 },
69 { offsetof(xfs_sb_t, sb_blocklog), 0 },
70 { offsetof(xfs_sb_t, sb_sectlog), 0 },
71 { offsetof(xfs_sb_t, sb_inodelog), 0 },
72 { offsetof(xfs_sb_t, sb_inopblog), 0 },
73 { offsetof(xfs_sb_t, sb_agblklog), 0 },
74 { offsetof(xfs_sb_t, sb_rextslog), 0 },
75 { offsetof(xfs_sb_t, sb_inprogress), 0 },
76 { offsetof(xfs_sb_t, sb_imax_pct), 0 },
77 { offsetof(xfs_sb_t, sb_icount), 0 },
78 { offsetof(xfs_sb_t, sb_ifree), 0 },
79 { offsetof(xfs_sb_t, sb_fdblocks), 0 },
80 { offsetof(xfs_sb_t, sb_frextents), 0 },
81 { offsetof(xfs_sb_t, sb_uquotino), 0 },
82 { offsetof(xfs_sb_t, sb_gquotino), 0 },
83 { offsetof(xfs_sb_t, sb_qflags), 0 },
84 { offsetof(xfs_sb_t, sb_flags), 0 },
85 { offsetof(xfs_sb_t, sb_shared_vn), 0 },
86 { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
87 { offsetof(xfs_sb_t, sb_unit), 0 },
88 { offsetof(xfs_sb_t, sb_width), 0 },
89 { offsetof(xfs_sb_t, sb_dirblklog), 0 },
90 { offsetof(xfs_sb_t, sb_logsectlog), 0 },
91 { offsetof(xfs_sb_t, sb_logsectsize), 0 },
92 { offsetof(xfs_sb_t, sb_logsunit), 0 },
93 { offsetof(xfs_sb_t, sb_features2), 0 },
94 { offsetof(xfs_sb_t, sb_bad_features2), 0 },
95 { offsetof(xfs_sb_t, sb_features_compat), 0 },
96 { offsetof(xfs_sb_t, sb_features_ro_compat), 0 },
97 { offsetof(xfs_sb_t, sb_features_incompat), 0 },
98 { offsetof(xfs_sb_t, sb_features_log_incompat), 0 },
99 { offsetof(xfs_sb_t, sb_crc), 0 },
100 { offsetof(xfs_sb_t, sb_pad), 0 },
101 { offsetof(xfs_sb_t, sb_pquotino), 0 },
102 { offsetof(xfs_sb_t, sb_lsn), 0 },
103 { sizeof(xfs_sb_t), 0 }
104};
105
106/* 43/*
107 * Reference counting access wrappers to the perag structures. 44 * Reference counting access wrappers to the perag structures.
108 * Because we never free per-ag structures, the only thing we 45 * Because we never free per-ag structures, the only thing we
@@ -461,58 +398,49 @@ xfs_sb_from_disk(
461 __xfs_sb_from_disk(to, from, true); 398 __xfs_sb_from_disk(to, from, true);
462} 399}
463 400
464static inline void 401static void
465xfs_sb_quota_to_disk( 402xfs_sb_quota_to_disk(
466 xfs_dsb_t *to, 403 struct xfs_dsb *to,
467 xfs_sb_t *from, 404 struct xfs_sb *from)
468 __int64_t *fields)
469{ 405{
470 __uint16_t qflags = from->sb_qflags; 406 __uint16_t qflags = from->sb_qflags;
471 407
408 to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
409 if (xfs_sb_version_has_pquotino(from)) {
410 to->sb_qflags = cpu_to_be16(from->sb_qflags);
411 to->sb_gquotino = cpu_to_be64(from->sb_gquotino);
412 to->sb_pquotino = cpu_to_be64(from->sb_pquotino);
413 return;
414 }
415
472 /* 416 /*
473 * We need to do these manipilations only if we are working 417 * The in-core version of sb_qflags do not have XFS_OQUOTA_*
474 * with an older version of on-disk superblock. 418 * flags, whereas the on-disk version does. So, convert incore
419 * XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags.
475 */ 420 */
476 if (xfs_sb_version_has_pquotino(from)) 421 qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
477 return; 422 XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
478 423
479 if (*fields & XFS_SB_QFLAGS) { 424 if (from->sb_qflags &
480 /* 425 (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD))
481 * The in-core version of sb_qflags do not have 426 qflags |= XFS_OQUOTA_ENFD;
482 * XFS_OQUOTA_* flags, whereas the on-disk version 427 if (from->sb_qflags &
483 * does. So, convert incore XFS_{PG}QUOTA_* flags 428 (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))
484 * to on-disk XFS_OQUOTA_* flags. 429 qflags |= XFS_OQUOTA_CHKD;
485 */ 430 to->sb_qflags = cpu_to_be16(qflags);
486 qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
487 XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
488
489 if (from->sb_qflags &
490 (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD))
491 qflags |= XFS_OQUOTA_ENFD;
492 if (from->sb_qflags &
493 (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))
494 qflags |= XFS_OQUOTA_CHKD;
495 to->sb_qflags = cpu_to_be16(qflags);
496 *fields &= ~XFS_SB_QFLAGS;
497 }
498 431
499 /* 432 /*
500 * GQUOTINO and PQUOTINO cannot be used together in versions of 433 * GQUOTINO and PQUOTINO cannot be used together in versions
501 * superblock that do not have pquotino. from->sb_flags tells us which 434 * of superblock that do not have pquotino. from->sb_flags
502 * quota is active and should be copied to disk. If neither are active, 435 * tells us which quota is active and should be copied to
503 * make sure we write NULLFSINO to the sb_gquotino field as a quota 436 * disk. If neither are active, we should NULL the inode.
504 * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
505 * bit is set.
506 * 437 *
507 * Note that we don't need to handle the sb_uquotino or sb_pquotino here 438 * In all cases, the separate pquotino must remain 0 because it
508 * as they do not require any translation. Hence the main sb field loop 439 * it beyond the "end" of the valid non-pquotino superblock.
509 * will write them appropriately from the in-core superblock.
510 */ 440 */
511 if ((*fields & XFS_SB_GQUOTINO) && 441 if (from->sb_qflags & XFS_GQUOTA_ACCT)
512 (from->sb_qflags & XFS_GQUOTA_ACCT))
513 to->sb_gquotino = cpu_to_be64(from->sb_gquotino); 442 to->sb_gquotino = cpu_to_be64(from->sb_gquotino);
514 else if ((*fields & XFS_SB_PQUOTINO) && 443 else if (from->sb_qflags & XFS_PQUOTA_ACCT)
515 (from->sb_qflags & XFS_PQUOTA_ACCT))
516 to->sb_gquotino = cpu_to_be64(from->sb_pquotino); 444 to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
517 else { 445 else {
518 /* 446 /*
@@ -526,63 +454,78 @@ xfs_sb_quota_to_disk(
526 to->sb_gquotino = cpu_to_be64(NULLFSINO); 454 to->sb_gquotino = cpu_to_be64(NULLFSINO);
527 } 455 }
528 456
529 *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); 457 to->sb_pquotino = 0;
530} 458}
531 459
532/*
533 * Copy in core superblock to ondisk one.
534 *
535 * The fields argument is mask of superblock fields to copy.
536 */
537void 460void
538xfs_sb_to_disk( 461xfs_sb_to_disk(
539 xfs_dsb_t *to, 462 struct xfs_dsb *to,
540 xfs_sb_t *from, 463 struct xfs_sb *from)
541 __int64_t fields)
542{ 464{
543 xfs_caddr_t to_ptr = (xfs_caddr_t)to; 465 xfs_sb_quota_to_disk(to, from);
544 xfs_caddr_t from_ptr = (xfs_caddr_t)from;
545 xfs_sb_field_t f;
546 int first;
547 int size;
548
549 ASSERT(fields);
550 if (!fields)
551 return;
552 466
553 /* We should never write the crc here, it's updated in the IO path */ 467 to->sb_magicnum = cpu_to_be32(from->sb_magicnum);
554 fields &= ~XFS_SB_CRC; 468 to->sb_blocksize = cpu_to_be32(from->sb_blocksize);
555 469 to->sb_dblocks = cpu_to_be64(from->sb_dblocks);
556 xfs_sb_quota_to_disk(to, from, &fields); 470 to->sb_rblocks = cpu_to_be64(from->sb_rblocks);
557 while (fields) { 471 to->sb_rextents = cpu_to_be64(from->sb_rextents);
558 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 472 memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid));
559 first = xfs_sb_info[f].offset; 473 to->sb_logstart = cpu_to_be64(from->sb_logstart);
560 size = xfs_sb_info[f + 1].offset - first; 474 to->sb_rootino = cpu_to_be64(from->sb_rootino);
561 475 to->sb_rbmino = cpu_to_be64(from->sb_rbmino);
562 ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); 476 to->sb_rsumino = cpu_to_be64(from->sb_rsumino);
563 477 to->sb_rextsize = cpu_to_be32(from->sb_rextsize);
564 if (size == 1 || xfs_sb_info[f].type == 1) { 478 to->sb_agblocks = cpu_to_be32(from->sb_agblocks);
565 memcpy(to_ptr + first, from_ptr + first, size); 479 to->sb_agcount = cpu_to_be32(from->sb_agcount);
566 } else { 480 to->sb_rbmblocks = cpu_to_be32(from->sb_rbmblocks);
567 switch (size) { 481 to->sb_logblocks = cpu_to_be32(from->sb_logblocks);
568 case 2: 482 to->sb_versionnum = cpu_to_be16(from->sb_versionnum);
569 *(__be16 *)(to_ptr + first) = 483 to->sb_sectsize = cpu_to_be16(from->sb_sectsize);
570 cpu_to_be16(*(__u16 *)(from_ptr + first)); 484 to->sb_inodesize = cpu_to_be16(from->sb_inodesize);
571 break; 485 to->sb_inopblock = cpu_to_be16(from->sb_inopblock);
572 case 4: 486 memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname));
573 *(__be32 *)(to_ptr + first) = 487 to->sb_blocklog = from->sb_blocklog;
574 cpu_to_be32(*(__u32 *)(from_ptr + first)); 488 to->sb_sectlog = from->sb_sectlog;
575 break; 489 to->sb_inodelog = from->sb_inodelog;
576 case 8: 490 to->sb_inopblog = from->sb_inopblog;
577 *(__be64 *)(to_ptr + first) = 491 to->sb_agblklog = from->sb_agblklog;
578 cpu_to_be64(*(__u64 *)(from_ptr + first)); 492 to->sb_rextslog = from->sb_rextslog;
579 break; 493 to->sb_inprogress = from->sb_inprogress;
580 default: 494 to->sb_imax_pct = from->sb_imax_pct;
581 ASSERT(0); 495 to->sb_icount = cpu_to_be64(from->sb_icount);
582 } 496 to->sb_ifree = cpu_to_be64(from->sb_ifree);
583 } 497 to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks);
498 to->sb_frextents = cpu_to_be64(from->sb_frextents);
584 499
585 fields &= ~(1LL << f); 500 to->sb_flags = from->sb_flags;
501 to->sb_shared_vn = from->sb_shared_vn;
502 to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt);
503 to->sb_unit = cpu_to_be32(from->sb_unit);
504 to->sb_width = cpu_to_be32(from->sb_width);
505 to->sb_dirblklog = from->sb_dirblklog;
506 to->sb_logsectlog = from->sb_logsectlog;
507 to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize);
508 to->sb_logsunit = cpu_to_be32(from->sb_logsunit);
509
510 /*
511 * We need to ensure that bad_features2 always matches features2.
512 * Hence we enforce that here rather than having to remember to do it
513 * everywhere else that updates features2.
514 */
515 from->sb_bad_features2 = from->sb_features2;
516 to->sb_features2 = cpu_to_be32(from->sb_features2);
517 to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2);
518
519 if (xfs_sb_version_hascrc(from)) {
520 to->sb_features_compat = cpu_to_be32(from->sb_features_compat);
521 to->sb_features_ro_compat =
522 cpu_to_be32(from->sb_features_ro_compat);
523 to->sb_features_incompat =
524 cpu_to_be32(from->sb_features_incompat);
525 to->sb_features_log_incompat =
526 cpu_to_be32(from->sb_features_log_incompat);
527 to->sb_pad = 0;
528 to->sb_lsn = cpu_to_be64(from->sb_lsn);
586 } 529 }
587} 530}
588 531
@@ -816,42 +759,51 @@ xfs_initialize_perag_data(
816} 759}
817 760
818/* 761/*
819 * xfs_mod_sb() can be used to copy arbitrary changes to the 762 * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock
820 * in-core superblock into the superblock buffer to be logged. 763 * into the superblock buffer to be logged. It does not provide the higher
821 * It does not provide the higher level of locking that is 764 * level of locking that is needed to protect the in-core superblock from
822 * needed to protect the in-core superblock from concurrent 765 * concurrent access.
823 * access.
824 */ 766 */
825void 767void
826xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) 768xfs_log_sb(
769 struct xfs_trans *tp)
827{ 770{
828 xfs_buf_t *bp; 771 struct xfs_mount *mp = tp->t_mountp;
829 int first; 772 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
830 int last;
831 xfs_mount_t *mp;
832 xfs_sb_field_t f;
833
834 ASSERT(fields);
835 if (!fields)
836 return;
837 mp = tp->t_mountp;
838 bp = xfs_trans_getsb(tp, mp, 0);
839 first = sizeof(xfs_sb_t);
840 last = 0;
841
842 /* translate/copy */
843 773
844 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); 774 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
775 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
776 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
777}
845 778
846 /* find modified range */ 779/*
847 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); 780 * xfs_sync_sb
848 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 781 *
849 last = xfs_sb_info[f + 1].offset - 1; 782 * Sync the superblock to disk.
783 *
784 * Note that the caller is responsible for checking the frozen state of the
785 * filesystem. This procedure uses the non-blocking transaction allocator and
786 * thus will allow modifications to a frozen fs. This is required because this
787 * code can be called during the process of freezing where use of the high-level
788 * allocator would deadlock.
789 */
790int
791xfs_sync_sb(
792 struct xfs_mount *mp,
793 bool wait)
794{
795 struct xfs_trans *tp;
796 int error;
850 797
851 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 798 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP);
852 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 799 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
853 first = xfs_sb_info[f].offset; 800 if (error) {
801 xfs_trans_cancel(tp, 0);
802 return error;
803 }
854 804
855 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); 805 xfs_log_sb(tp);
856 xfs_trans_log_buf(tp, bp, first, last); 806 if (wait)
807 xfs_trans_set_sync(tp);
808 return xfs_trans_commit(tp, 0);
857} 809}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 8eb1c54bafbf..b25bb9a343f3 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -27,11 +27,12 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
27extern void xfs_perag_put(struct xfs_perag *pag); 27extern void xfs_perag_put(struct xfs_perag *pag);
28extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); 28extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
29 29
30extern void xfs_sb_calc_crc(struct xfs_buf *); 30extern void xfs_sb_calc_crc(struct xfs_buf *bp);
31extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 31extern void xfs_log_sb(struct xfs_trans *tp);
32extern void xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *); 32extern int xfs_sync_sb(struct xfs_mount *mp, bool wait);
33extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 33extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
34extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 34extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
35extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
35extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); 36extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
36 37
37#endif /* __XFS_SB_H__ */ 38#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 82404da2ca67..8dda4b321343 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -82,7 +82,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
82#define XFS_TRANS_ATTR_RM 23 82#define XFS_TRANS_ATTR_RM 23
83#define XFS_TRANS_ATTR_FLAG 24 83#define XFS_TRANS_ATTR_FLAG 24
84#define XFS_TRANS_CLEAR_AGI_BUCKET 25 84#define XFS_TRANS_CLEAR_AGI_BUCKET 25
85#define XFS_TRANS_QM_SBCHANGE 26 85#define XFS_TRANS_SB_CHANGE 26
86/* 86/*
87 * Dummy entries since we use the transaction type to index into the 87 * Dummy entries since we use the transaction type to index into the
88 * trans_type[] in xlog_recover_print_trans_head() 88 * trans_type[] in xlog_recover_print_trans_head()
@@ -95,17 +95,15 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
95#define XFS_TRANS_QM_DQCLUSTER 32 95#define XFS_TRANS_QM_DQCLUSTER 32
96#define XFS_TRANS_QM_QINOCREATE 33 96#define XFS_TRANS_QM_QINOCREATE 33
97#define XFS_TRANS_QM_QUOTAOFF_END 34 97#define XFS_TRANS_QM_QUOTAOFF_END 34
98#define XFS_TRANS_SB_UNIT 35 98#define XFS_TRANS_FSYNC_TS 35
99#define XFS_TRANS_FSYNC_TS 36 99#define XFS_TRANS_GROWFSRT_ALLOC 36
100#define XFS_TRANS_GROWFSRT_ALLOC 37 100#define XFS_TRANS_GROWFSRT_ZERO 37
101#define XFS_TRANS_GROWFSRT_ZERO 38 101#define XFS_TRANS_GROWFSRT_FREE 38
102#define XFS_TRANS_GROWFSRT_FREE 39 102#define XFS_TRANS_SWAPEXT 39
103#define XFS_TRANS_SWAPEXT 40 103#define XFS_TRANS_CHECKPOINT 40
104#define XFS_TRANS_SB_COUNT 41 104#define XFS_TRANS_ICREATE 41
105#define XFS_TRANS_CHECKPOINT 42 105#define XFS_TRANS_CREATE_TMPFILE 42
106#define XFS_TRANS_ICREATE 43 106#define XFS_TRANS_TYPE_MAX 43
107#define XFS_TRANS_CREATE_TMPFILE 44
108#define XFS_TRANS_TYPE_MAX 44
109/* new transaction types need to be reflected in xfs_logprint(8) */ 107/* new transaction types need to be reflected in xfs_logprint(8) */
110 108
111#define XFS_TRANS_TYPES \ 109#define XFS_TRANS_TYPES \
@@ -113,7 +111,6 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
113 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ 111 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
114 { XFS_TRANS_INACTIVE, "INACTIVE" }, \ 112 { XFS_TRANS_INACTIVE, "INACTIVE" }, \
115 { XFS_TRANS_CREATE, "CREATE" }, \ 113 { XFS_TRANS_CREATE, "CREATE" }, \
116 { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
117 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ 114 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
118 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ 115 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
119 { XFS_TRANS_REMOVE, "REMOVE" }, \ 116 { XFS_TRANS_REMOVE, "REMOVE" }, \
@@ -134,23 +131,23 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
134 { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ 131 { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
135 { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ 132 { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
136 { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ 133 { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
137 { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ 134 { XFS_TRANS_SB_CHANGE, "SBCHANGE" }, \
135 { XFS_TRANS_DUMMY1, "DUMMY1" }, \
136 { XFS_TRANS_DUMMY2, "DUMMY2" }, \
138 { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ 137 { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
139 { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ 138 { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
140 { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ 139 { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
141 { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ 140 { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
142 { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ 141 { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
143 { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ 142 { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
144 { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
145 { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ 143 { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
146 { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ 144 { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
147 { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ 145 { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
148 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ 146 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
149 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ 147 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
150 { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
151 { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ 148 { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
152 { XFS_TRANS_DUMMY1, "DUMMY1" }, \ 149 { XFS_TRANS_ICREATE, "ICREATE" }, \
153 { XFS_TRANS_DUMMY2, "DUMMY2" }, \ 150 { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
154 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } 151 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
155 152
156/* 153/*
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index c80c5236c3da..e7e26bd6468f 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -178,6 +178,8 @@ xfs_symlink_local_to_remote(
178 struct xfs_mount *mp = ip->i_mount; 178 struct xfs_mount *mp = ip->i_mount;
179 char *buf; 179 char *buf;
180 180
181 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF);
182
181 if (!xfs_sb_version_hascrc(&mp->m_sb)) { 183 if (!xfs_sb_version_hascrc(&mp->m_sb)) {
182 bp->b_ops = NULL; 184 bp->b_ops = NULL;
183 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 185 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6c1330f29050..68cb1e7bf2bb 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -716,17 +716,6 @@ xfs_calc_clear_agi_bucket_reservation(
716} 716}
717 717
718/* 718/*
719 * Clearing the quotaflags in the superblock.
720 * the super block for changing quota flags: sector size
721 */
722STATIC uint
723xfs_calc_qm_sbchange_reservation(
724 struct xfs_mount *mp)
725{
726 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
727}
728
729/*
730 * Adjusting quota limits. 719 * Adjusting quota limits.
731 * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) 720 * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
732 */ 721 */
@@ -864,9 +853,6 @@ xfs_trans_resv_calc(
864 * The following transactions are logged in logical format with 853 * The following transactions are logged in logical format with
865 * a default log count. 854 * a default log count.
866 */ 855 */
867 resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp);
868 resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT;
869
870 resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp); 856 resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp);
871 resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT; 857 resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
872 858
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 1097d14cd583..2d5bdfce6d8f 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -56,7 +56,6 @@ struct xfs_trans_resv {
56 struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */ 56 struct xfs_trans_res tr_growrtalloc; /* grow realtime allocations */
57 struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */ 57 struct xfs_trans_res tr_growrtzero; /* grow realtime zeroing */
58 struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */ 58 struct xfs_trans_res tr_growrtfree; /* grow realtime freeing */
59 struct xfs_trans_res tr_qm_sbchange; /* change quota flags */
60 struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */ 59 struct xfs_trans_res tr_qm_setqlim; /* adjust quota limits */
61 struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ 60 struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */
62 struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */ 61 struct xfs_trans_res tr_qm_quotaoff; /* turn quota off */
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index b79dc66b2ecd..b79dc66b2ecd 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 18e2f3bbae5e..3a9b7a1b8704 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc(
135 */ 135 */
136STATIC int 136STATIC int
137xfs_setfilesize( 137xfs_setfilesize(
138 struct xfs_ioend *ioend) 138 struct xfs_inode *ip,
139 struct xfs_trans *tp,
140 xfs_off_t offset,
141 size_t size)
139{ 142{
140 struct xfs_inode *ip = XFS_I(ioend->io_inode);
141 struct xfs_trans *tp = ioend->io_append_trans;
142 xfs_fsize_t isize; 143 xfs_fsize_t isize;
143 144
144 /*
145 * The transaction may have been allocated in the I/O submission thread,
146 * thus we need to mark ourselves as beeing in a transaction manually.
147 * Similarly for freeze protection.
148 */
149 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
150 rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
151 0, 1, _THIS_IP_);
152
153 xfs_ilock(ip, XFS_ILOCK_EXCL); 145 xfs_ilock(ip, XFS_ILOCK_EXCL);
154 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); 146 isize = xfs_new_eof(ip, offset + size);
155 if (!isize) { 147 if (!isize) {
156 xfs_iunlock(ip, XFS_ILOCK_EXCL); 148 xfs_iunlock(ip, XFS_ILOCK_EXCL);
157 xfs_trans_cancel(tp, 0); 149 xfs_trans_cancel(tp, 0);
158 return 0; 150 return 0;
159 } 151 }
160 152
161 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 153 trace_xfs_setfilesize(ip, offset, size);
162 154
163 ip->i_d.di_size = isize; 155 ip->i_d.di_size = isize;
164 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 156 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -167,6 +159,25 @@ xfs_setfilesize(
167 return xfs_trans_commit(tp, 0); 159 return xfs_trans_commit(tp, 0);
168} 160}
169 161
162STATIC int
163xfs_setfilesize_ioend(
164 struct xfs_ioend *ioend)
165{
166 struct xfs_inode *ip = XFS_I(ioend->io_inode);
167 struct xfs_trans *tp = ioend->io_append_trans;
168
169 /*
170 * The transaction may have been allocated in the I/O submission thread,
171 * thus we need to mark ourselves as being in a transaction manually.
172 * Similarly for freeze protection.
173 */
174 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
175 rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
176 0, 1, _THIS_IP_);
177
178 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
179}
180
170/* 181/*
171 * Schedule IO completion handling on the final put of an ioend. 182 * Schedule IO completion handling on the final put of an ioend.
172 * 183 *
@@ -182,8 +193,7 @@ xfs_finish_ioend(
182 193
183 if (ioend->io_type == XFS_IO_UNWRITTEN) 194 if (ioend->io_type == XFS_IO_UNWRITTEN)
184 queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 195 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
185 else if (ioend->io_append_trans || 196 else if (ioend->io_append_trans)
186 (ioend->io_isdirect && xfs_ioend_is_append(ioend)))
187 queue_work(mp->m_data_workqueue, &ioend->io_work); 197 queue_work(mp->m_data_workqueue, &ioend->io_work);
188 else 198 else
189 xfs_destroy_ioend(ioend); 199 xfs_destroy_ioend(ioend);
@@ -215,22 +225,8 @@ xfs_end_io(
215 if (ioend->io_type == XFS_IO_UNWRITTEN) { 225 if (ioend->io_type == XFS_IO_UNWRITTEN) {
216 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 226 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
217 ioend->io_size); 227 ioend->io_size);
218 } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) {
219 /*
220 * For direct I/O we do not know if we need to allocate blocks
221 * or not so we can't preallocate an append transaction as that
222 * results in nested reservations and log space deadlocks. Hence
223 * allocate the transaction here. While this is sub-optimal and
224 * can block IO completion for some time, we're stuck with doing
225 * it this way until we can pass the ioend to the direct IO
226 * allocation callbacks and avoid nesting that way.
227 */
228 error = xfs_setfilesize_trans_alloc(ioend);
229 if (error)
230 goto done;
231 error = xfs_setfilesize(ioend);
232 } else if (ioend->io_append_trans) { 228 } else if (ioend->io_append_trans) {
233 error = xfs_setfilesize(ioend); 229 error = xfs_setfilesize_ioend(ioend);
234 } else { 230 } else {
235 ASSERT(!xfs_ioend_is_append(ioend)); 231 ASSERT(!xfs_ioend_is_append(ioend));
236 } 232 }
@@ -242,17 +238,6 @@ done:
242} 238}
243 239
244/* 240/*
245 * Call IO completion handling in caller context on the final put of an ioend.
246 */
247STATIC void
248xfs_finish_ioend_sync(
249 struct xfs_ioend *ioend)
250{
251 if (atomic_dec_and_test(&ioend->io_remaining))
252 xfs_end_io(&ioend->io_work);
253}
254
255/*
256 * Allocate and initialise an IO completion structure. 241 * Allocate and initialise an IO completion structure.
257 * We need to track unwritten extent write completion here initially. 242 * We need to track unwritten extent write completion here initially.
258 * We'll need to extend this for updating the ondisk inode size later 243 * We'll need to extend this for updating the ondisk inode size later
@@ -273,7 +258,6 @@ xfs_alloc_ioend(
273 * all the I/O from calling the completion routine too early. 258 * all the I/O from calling the completion routine too early.
274 */ 259 */
275 atomic_set(&ioend->io_remaining, 1); 260 atomic_set(&ioend->io_remaining, 1);
276 ioend->io_isdirect = 0;
277 ioend->io_error = 0; 261 ioend->io_error = 0;
278 ioend->io_list = NULL; 262 ioend->io_list = NULL;
279 ioend->io_type = type; 263 ioend->io_type = type;
@@ -1459,11 +1443,7 @@ xfs_get_blocks_direct(
1459 * 1443 *
1460 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1444 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1461 * need to issue a transaction to convert the range from unwritten to written 1445 * need to issue a transaction to convert the range from unwritten to written
1462 * extents. In case this is regular synchronous I/O we just call xfs_end_io 1446 * extents.
1463 * to do this and we are done. But in case this was a successful AIO
1464 * request this handler is called from interrupt context, from which we
1465 * can't start transactions. In that case offload the I/O completion to
1466 * the workqueues we also use for buffered I/O completion.
1467 */ 1447 */
1468STATIC void 1448STATIC void
1469xfs_end_io_direct_write( 1449xfs_end_io_direct_write(
@@ -1472,7 +1452,12 @@ xfs_end_io_direct_write(
1472 ssize_t size, 1452 ssize_t size,
1473 void *private) 1453 void *private)
1474{ 1454{
1475 struct xfs_ioend *ioend = iocb->private; 1455 struct inode *inode = file_inode(iocb->ki_filp);
1456 struct xfs_inode *ip = XFS_I(inode);
1457 struct xfs_mount *mp = ip->i_mount;
1458
1459 if (XFS_FORCED_SHUTDOWN(mp))
1460 return;
1476 1461
1477 /* 1462 /*
1478 * While the generic direct I/O code updates the inode size, it does 1463 * While the generic direct I/O code updates the inode size, it does
@@ -1480,22 +1465,33 @@ xfs_end_io_direct_write(
1480 * end_io handler thinks the on-disk size is outside the in-core 1465 * end_io handler thinks the on-disk size is outside the in-core
1481 * size. To prevent this just update it a little bit earlier here. 1466 * size. To prevent this just update it a little bit earlier here.
1482 */ 1467 */
1483 if (offset + size > i_size_read(ioend->io_inode)) 1468 if (offset + size > i_size_read(inode))
1484 i_size_write(ioend->io_inode, offset + size); 1469 i_size_write(inode, offset + size);
1485 1470
1486 /* 1471 /*
1487 * blockdev_direct_IO can return an error even after the I/O 1472 * For direct I/O we do not know if we need to allocate blocks or not,
1488 * completion handler was called. Thus we need to protect 1473 * so we can't preallocate an append transaction, as that results in
1489 * against double-freeing. 1474 * nested reservations and log space deadlocks. Hence allocate the
1475 * transaction here. While this is sub-optimal and can block IO
1476 * completion for some time, we're stuck with doing it this way until
1477 * we can pass the ioend to the direct IO allocation callbacks and
1478 * avoid nesting that way.
1490 */ 1479 */
1491 iocb->private = NULL; 1480 if (private && size > 0) {
1492 1481 xfs_iomap_write_unwritten(ip, offset, size);
1493 ioend->io_offset = offset; 1482 } else if (offset + size > ip->i_d.di_size) {
1494 ioend->io_size = size; 1483 struct xfs_trans *tp;
1495 if (private && size > 0) 1484 int error;
1496 ioend->io_type = XFS_IO_UNWRITTEN; 1485
1486 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1487 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
1488 if (error) {
1489 xfs_trans_cancel(tp, 0);
1490 return;
1491 }
1497 1492
1498 xfs_finish_ioend_sync(ioend); 1493 xfs_setfilesize(ip, tp, offset, size);
1494 }
1499} 1495}
1500 1496
1501STATIC ssize_t 1497STATIC ssize_t
@@ -1507,39 +1503,16 @@ xfs_vm_direct_IO(
1507{ 1503{
1508 struct inode *inode = iocb->ki_filp->f_mapping->host; 1504 struct inode *inode = iocb->ki_filp->f_mapping->host;
1509 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1505 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1510 struct xfs_ioend *ioend = NULL;
1511 ssize_t ret;
1512 1506
1513 if (rw & WRITE) { 1507 if (rw & WRITE) {
1514 size_t size = iov_iter_count(iter); 1508 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1515
1516 /*
1517 * We cannot preallocate a size update transaction here as we
1518 * don't know whether allocation is necessary or not. Hence we
1519 * can only tell IO completion that one is necessary if we are
1520 * not doing unwritten extent conversion.
1521 */
1522 iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
1523 if (offset + size > XFS_I(inode)->i_d.di_size)
1524 ioend->io_isdirect = 1;
1525
1526 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1527 offset, xfs_get_blocks_direct, 1509 offset, xfs_get_blocks_direct,
1528 xfs_end_io_direct_write, NULL, 1510 xfs_end_io_direct_write, NULL,
1529 DIO_ASYNC_EXTEND); 1511 DIO_ASYNC_EXTEND);
1530 if (ret != -EIOCBQUEUED && iocb->private)
1531 goto out_destroy_ioend;
1532 } else {
1533 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1534 offset, xfs_get_blocks_direct,
1535 NULL, NULL, 0);
1536 } 1512 }
1537 1513 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1538 return ret; 1514 offset, xfs_get_blocks_direct,
1539 1515 NULL, NULL, 0);
1540out_destroy_ioend:
1541 xfs_destroy_ioend(ioend);
1542 return ret;
1543} 1516}
1544 1517
1545/* 1518/*
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f94dd459dff9..ac644e0137a4 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool;
24 * Types of I/O for bmap clustering and I/O completion tracking. 24 * Types of I/O for bmap clustering and I/O completion tracking.
25 */ 25 */
26enum { 26enum {
27 XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */
28 XFS_IO_DELALLOC, /* covers delalloc region */ 27 XFS_IO_DELALLOC, /* covers delalloc region */
29 XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ 28 XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
30 XFS_IO_OVERWRITE, /* covers already allocated extent */ 29 XFS_IO_OVERWRITE, /* covers already allocated extent */
31}; 30};
32 31
33#define XFS_IO_TYPES \ 32#define XFS_IO_TYPES \
34 { 0, "" }, \
35 { XFS_IO_DELALLOC, "delalloc" }, \ 33 { XFS_IO_DELALLOC, "delalloc" }, \
36 { XFS_IO_UNWRITTEN, "unwritten" }, \ 34 { XFS_IO_UNWRITTEN, "unwritten" }, \
37 { XFS_IO_OVERWRITE, "overwrite" } 35 { XFS_IO_OVERWRITE, "overwrite" }
@@ -45,7 +43,6 @@ typedef struct xfs_ioend {
45 unsigned int io_type; /* delalloc / unwritten */ 43 unsigned int io_type; /* delalloc / unwritten */
46 int io_error; /* I/O error code */ 44 int io_error; /* I/O error code */
47 atomic_t io_remaining; /* hold count */ 45 atomic_t io_remaining; /* hold count */
48 unsigned int io_isdirect : 1;/* direct I/O */
49 struct inode *io_inode; /* file being written to */ 46 struct inode *io_inode; /* file being written to */
50 struct buffer_head *io_buffer_head;/* buffer linked list head */ 47 struct buffer_head *io_buffer_head;/* buffer linked list head */
51 struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 48 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 2fdb72d2c908..736429a72a12 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -26,43 +26,8 @@ struct xfs_ifork;
26struct xfs_inode; 26struct xfs_inode;
27struct xfs_mount; 27struct xfs_mount;
28struct xfs_trans; 28struct xfs_trans;
29struct xfs_bmalloca;
29 30
30/*
31 * Argument structure for xfs_bmap_alloc.
32 */
33struct xfs_bmalloca {
34 xfs_fsblock_t *firstblock; /* i/o first block allocated */
35 struct xfs_bmap_free *flist; /* bmap freelist */
36 struct xfs_trans *tp; /* transaction pointer */
37 struct xfs_inode *ip; /* incore inode pointer */
38 struct xfs_bmbt_irec prev; /* extent before the new one */
39 struct xfs_bmbt_irec got; /* extent after, or delayed */
40
41 xfs_fileoff_t offset; /* offset in file filling in */
42 xfs_extlen_t length; /* i/o length asked/allocated */
43 xfs_fsblock_t blkno; /* starting block of new extent */
44
45 struct xfs_btree_cur *cur; /* btree cursor */
46 xfs_extnum_t idx; /* current extent index */
47 int nallocs;/* number of extents alloc'd */
48 int logflags;/* flags for transaction logging */
49
50 xfs_extlen_t total; /* total blocks needed for xaction */
51 xfs_extlen_t minlen; /* minimum allocation size (blocks) */
52 xfs_extlen_t minleft; /* amount must be left after alloc */
53 bool eof; /* set if allocating past last extent */
54 bool wasdel; /* replacing a delayed allocation */
55 bool userdata;/* set if is user data */
56 bool aeof; /* allocated space at eof */
57 bool conv; /* overwriting unwritten extents */
58 int flags;
59 struct completion *done;
60 struct work_struct work;
61 int result;
62};
63
64int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
65 int *committed);
66int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); 31int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
67int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, 32int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
68 int whichfork, int *eof); 33 int whichfork, int *eof);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 3f9bd58edec7..507d96a57ac7 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -319,6 +319,10 @@ xfs_buf_item_format(
319 ASSERT(atomic_read(&bip->bli_refcount) > 0); 319 ASSERT(atomic_read(&bip->bli_refcount) > 0);
320 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 320 ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
321 (bip->bli_flags & XFS_BLI_STALE)); 321 (bip->bli_flags & XFS_BLI_STALE));
322 ASSERT((bip->bli_flags & XFS_BLI_STALE) ||
323 (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF
324 && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF));
325
322 326
323 /* 327 /*
324 * If it is an inode buffer, transfer the in-memory state to the 328 * If it is an inode buffer, transfer the in-memory state to the
@@ -535,7 +539,7 @@ xfs_buf_item_push(
535 if ((bp->b_flags & XBF_WRITE_FAIL) && 539 if ((bp->b_flags & XBF_WRITE_FAIL) &&
536 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { 540 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
537 xfs_warn(bp->b_target->bt_mount, 541 xfs_warn(bp->b_target->bt_mount,
538"Detected failing async write on buffer block 0x%llx. Retrying async write.\n", 542"Detected failing async write on buffer block 0x%llx. Retrying async write.",
539 (long long)bp->b_bn); 543 (long long)bp->b_bn);
540 } 544 }
541 545
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index c24c67e22a2a..2f536f33cd26 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -86,7 +86,7 @@ static inline void xfs_dqflock(xfs_dquot_t *dqp)
86 wait_for_completion(&dqp->q_flush); 86 wait_for_completion(&dqp->q_flush);
87} 87}
88 88
89static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) 89static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp)
90{ 90{
91 return try_wait_for_completion(&dqp->q_flush); 91 return try_wait_for_completion(&dqp->q_flush);
92} 92}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 13e974e6a889..1cdba95c78cb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -127,6 +127,42 @@ xfs_iozero(
127 return (-status); 127 return (-status);
128} 128}
129 129
130int
131xfs_update_prealloc_flags(
132 struct xfs_inode *ip,
133 enum xfs_prealloc_flags flags)
134{
135 struct xfs_trans *tp;
136 int error;
137
138 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
139 error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
140 if (error) {
141 xfs_trans_cancel(tp, 0);
142 return error;
143 }
144
145 xfs_ilock(ip, XFS_ILOCK_EXCL);
146 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
147
148 if (!(flags & XFS_PREALLOC_INVISIBLE)) {
149 ip->i_d.di_mode &= ~S_ISUID;
150 if (ip->i_d.di_mode & S_IXGRP)
151 ip->i_d.di_mode &= ~S_ISGID;
152 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
153 }
154
155 if (flags & XFS_PREALLOC_SET)
156 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
157 if (flags & XFS_PREALLOC_CLEAR)
158 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
159
160 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
161 if (flags & XFS_PREALLOC_SYNC)
162 xfs_trans_set_sync(tp);
163 return xfs_trans_commit(tp, 0);
164}
165
130/* 166/*
131 * Fsync operations on directories are much simpler than on regular files, 167 * Fsync operations on directories are much simpler than on regular files,
132 * as there is no file data to flush, and thus also no need for explicit 168 * as there is no file data to flush, and thus also no need for explicit
@@ -699,7 +735,7 @@ xfs_file_buffered_aio_write(
699 735
700 iov_iter_truncate(from, count); 736 iov_iter_truncate(from, count);
701 /* We can write back this queue in page reclaim */ 737 /* We can write back this queue in page reclaim */
702 current->backing_dev_info = mapping->backing_dev_info; 738 current->backing_dev_info = inode_to_bdi(inode);
703 739
704write_retry: 740write_retry:
705 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); 741 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
@@ -784,8 +820,8 @@ xfs_file_fallocate(
784{ 820{
785 struct inode *inode = file_inode(file); 821 struct inode *inode = file_inode(file);
786 struct xfs_inode *ip = XFS_I(inode); 822 struct xfs_inode *ip = XFS_I(inode);
787 struct xfs_trans *tp;
788 long error; 823 long error;
824 enum xfs_prealloc_flags flags = 0;
789 loff_t new_size = 0; 825 loff_t new_size = 0;
790 826
791 if (!S_ISREG(inode->i_mode)) 827 if (!S_ISREG(inode->i_mode))
@@ -822,6 +858,8 @@ xfs_file_fallocate(
822 if (error) 858 if (error)
823 goto out_unlock; 859 goto out_unlock;
824 } else { 860 } else {
861 flags |= XFS_PREALLOC_SET;
862
825 if (!(mode & FALLOC_FL_KEEP_SIZE) && 863 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
826 offset + len > i_size_read(inode)) { 864 offset + len > i_size_read(inode)) {
827 new_size = offset + len; 865 new_size = offset + len;
@@ -839,28 +877,10 @@ xfs_file_fallocate(
839 goto out_unlock; 877 goto out_unlock;
840 } 878 }
841 879
842 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
843 error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
844 if (error) {
845 xfs_trans_cancel(tp, 0);
846 goto out_unlock;
847 }
848
849 xfs_ilock(ip, XFS_ILOCK_EXCL);
850 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
851 ip->i_d.di_mode &= ~S_ISUID;
852 if (ip->i_d.di_mode & S_IXGRP)
853 ip->i_d.di_mode &= ~S_ISGID;
854
855 if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
856 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
857
858 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
859 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
860
861 if (file->f_flags & O_DSYNC) 880 if (file->f_flags & O_DSYNC)
862 xfs_trans_set_sync(tp); 881 flags |= XFS_PREALLOC_SYNC;
863 error = xfs_trans_commit(tp, 0); 882
883 error = xfs_update_prealloc_flags(ip, flags);
864 if (error) 884 if (error)
865 goto out_unlock; 885 goto out_unlock;
866 886
@@ -1384,5 +1404,4 @@ static const struct vm_operations_struct xfs_file_vm_ops = {
1384 .fault = filemap_fault, 1404 .fault = filemap_fault,
1385 .map_pages = filemap_map_pages, 1405 .map_pages = filemap_map_pages,
1386 .page_mkwrite = xfs_vm_page_mkwrite, 1406 .page_mkwrite = xfs_vm_page_mkwrite,
1387 .remap_pages = generic_file_remap_pages,
1388}; 1407};
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index fdc64220fcb0..fba6532efba4 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -488,6 +488,7 @@ xfs_growfs_data_private(
488 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree); 488 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
489 if (dpct) 489 if (dpct)
490 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 490 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
491 xfs_trans_set_sync(tp);
491 error = xfs_trans_commit(tp, 0); 492 error = xfs_trans_commit(tp, 0);
492 if (error) 493 if (error)
493 return error; 494 return error;
@@ -541,7 +542,7 @@ xfs_growfs_data_private(
541 saved_error = error; 542 saved_error = error;
542 continue; 543 continue;
543 } 544 }
544 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); 545 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
545 546
546 error = xfs_bwrite(bp); 547 error = xfs_bwrite(bp);
547 xfs_buf_relse(bp); 548 xfs_buf_relse(bp);
@@ -756,37 +757,6 @@ out:
756 return 0; 757 return 0;
757} 758}
758 759
759/*
760 * Dump a transaction into the log that contains no real change. This is needed
761 * to be able to make the log dirty or stamp the current tail LSN into the log
762 * during the covering operation.
763 *
764 * We cannot use an inode here for this - that will push dirty state back up
765 * into the VFS and then periodic inode flushing will prevent log covering from
766 * making progress. Hence we log a field in the superblock instead and use a
767 * synchronous transaction to ensure the superblock is immediately unpinned
768 * and can be written back.
769 */
770int
771xfs_fs_log_dummy(
772 xfs_mount_t *mp)
773{
774 xfs_trans_t *tp;
775 int error;
776
777 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
778 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
779 if (error) {
780 xfs_trans_cancel(tp, 0);
781 return error;
782 }
783
784 /* log the UUID because it is an unchanging field */
785 xfs_mod_sb(tp, XFS_SB_UUID);
786 xfs_trans_set_sync(tp);
787 return xfs_trans_commit(tp, 0);
788}
789
790int 760int
791xfs_fs_goingdown( 761xfs_fs_goingdown(
792 xfs_mount_t *mp, 762 xfs_mount_t *mp,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 41f804e740d7..daafa1f6d260 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1995,6 +1995,7 @@ xfs_iunlink(
1995 agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 1995 agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
1996 offset = offsetof(xfs_agi_t, agi_unlinked) + 1996 offset = offsetof(xfs_agi_t, agi_unlinked) +
1997 (sizeof(xfs_agino_t) * bucket_index); 1997 (sizeof(xfs_agino_t) * bucket_index);
1998 xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
1998 xfs_trans_log_buf(tp, agibp, offset, 1999 xfs_trans_log_buf(tp, agibp, offset,
1999 (offset + sizeof(xfs_agino_t) - 1)); 2000 (offset + sizeof(xfs_agino_t) - 1));
2000 return 0; 2001 return 0;
@@ -2086,6 +2087,7 @@ xfs_iunlink_remove(
2086 agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 2087 agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
2087 offset = offsetof(xfs_agi_t, agi_unlinked) + 2088 offset = offsetof(xfs_agi_t, agi_unlinked) +
2088 (sizeof(xfs_agino_t) * bucket_index); 2089 (sizeof(xfs_agino_t) * bucket_index);
2090 xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
2089 xfs_trans_log_buf(tp, agibp, offset, 2091 xfs_trans_log_buf(tp, agibp, offset,
2090 (offset + sizeof(xfs_agino_t) - 1)); 2092 (offset + sizeof(xfs_agino_t) - 1));
2091 } else { 2093 } else {
@@ -2656,6 +2658,124 @@ xfs_sort_for_rename(
2656} 2658}
2657 2659
2658/* 2660/*
2661 * xfs_cross_rename()
2662 *
2663 * responsible for handling RENAME_EXCHANGE flag in renameat2() sytemcall
2664 */
2665STATIC int
2666xfs_cross_rename(
2667 struct xfs_trans *tp,
2668 struct xfs_inode *dp1,
2669 struct xfs_name *name1,
2670 struct xfs_inode *ip1,
2671 struct xfs_inode *dp2,
2672 struct xfs_name *name2,
2673 struct xfs_inode *ip2,
2674 struct xfs_bmap_free *free_list,
2675 xfs_fsblock_t *first_block,
2676 int spaceres)
2677{
2678 int error = 0;
2679 int ip1_flags = 0;
2680 int ip2_flags = 0;
2681 int dp2_flags = 0;
2682
2683 /* Swap inode number for dirent in first parent */
2684 error = xfs_dir_replace(tp, dp1, name1,
2685 ip2->i_ino,
2686 first_block, free_list, spaceres);
2687 if (error)
2688 goto out;
2689
2690 /* Swap inode number for dirent in second parent */
2691 error = xfs_dir_replace(tp, dp2, name2,
2692 ip1->i_ino,
2693 first_block, free_list, spaceres);
2694 if (error)
2695 goto out;
2696
2697 /*
2698 * If we're renaming one or more directories across different parents,
2699 * update the respective ".." entries (and link counts) to match the new
2700 * parents.
2701 */
2702 if (dp1 != dp2) {
2703 dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
2704
2705 if (S_ISDIR(ip2->i_d.di_mode)) {
2706 error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
2707 dp1->i_ino, first_block,
2708 free_list, spaceres);
2709 if (error)
2710 goto out;
2711
2712 /* transfer ip2 ".." reference to dp1 */
2713 if (!S_ISDIR(ip1->i_d.di_mode)) {
2714 error = xfs_droplink(tp, dp2);
2715 if (error)
2716 goto out;
2717 error = xfs_bumplink(tp, dp1);
2718 if (error)
2719 goto out;
2720 }
2721
2722 /*
2723 * Although ip1 isn't changed here, userspace needs
2724 * to be warned about the change, so that applications
2725 * relying on it (like backup ones), will properly
2726 * notify the change
2727 */
2728 ip1_flags |= XFS_ICHGTIME_CHG;
2729 ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
2730 }
2731
2732 if (S_ISDIR(ip1->i_d.di_mode)) {
2733 error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
2734 dp2->i_ino, first_block,
2735 free_list, spaceres);
2736 if (error)
2737 goto out;
2738
2739 /* transfer ip1 ".." reference to dp2 */
2740 if (!S_ISDIR(ip2->i_d.di_mode)) {
2741 error = xfs_droplink(tp, dp1);
2742 if (error)
2743 goto out;
2744 error = xfs_bumplink(tp, dp2);
2745 if (error)
2746 goto out;
2747 }
2748
2749 /*
2750 * Although ip2 isn't changed here, userspace needs
2751 * to be warned about the change, so that applications
2752 * relying on it (like backup ones), will properly
2753 * notify the change
2754 */
2755 ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
2756 ip2_flags |= XFS_ICHGTIME_CHG;
2757 }
2758 }
2759
2760 if (ip1_flags) {
2761 xfs_trans_ichgtime(tp, ip1, ip1_flags);
2762 xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
2763 }
2764 if (ip2_flags) {
2765 xfs_trans_ichgtime(tp, ip2, ip2_flags);
2766 xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
2767 }
2768 if (dp2_flags) {
2769 xfs_trans_ichgtime(tp, dp2, dp2_flags);
2770 xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
2771 }
2772 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2773 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
2774out:
2775 return error;
2776}
2777
2778/*
2659 * xfs_rename 2779 * xfs_rename
2660 */ 2780 */
2661int 2781int
@@ -2665,7 +2785,8 @@ xfs_rename(
2665 xfs_inode_t *src_ip, 2785 xfs_inode_t *src_ip,
2666 xfs_inode_t *target_dp, 2786 xfs_inode_t *target_dp,
2667 struct xfs_name *target_name, 2787 struct xfs_name *target_name,
2668 xfs_inode_t *target_ip) 2788 xfs_inode_t *target_ip,
2789 unsigned int flags)
2669{ 2790{
2670 xfs_trans_t *tp = NULL; 2791 xfs_trans_t *tp = NULL;
2671 xfs_mount_t *mp = src_dp->i_mount; 2792 xfs_mount_t *mp = src_dp->i_mount;
@@ -2743,6 +2864,18 @@ xfs_rename(
2743 } 2864 }
2744 2865
2745 /* 2866 /*
2867 * Handle RENAME_EXCHANGE flags
2868 */
2869 if (flags & RENAME_EXCHANGE) {
2870 error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
2871 target_dp, target_name, target_ip,
2872 &free_list, &first_block, spaceres);
2873 if (error)
2874 goto abort_return;
2875 goto finish_rename;
2876 }
2877
2878 /*
2746 * Set up the target. 2879 * Set up the target.
2747 */ 2880 */
2748 if (target_ip == NULL) { 2881 if (target_ip == NULL) {
@@ -2881,6 +3014,7 @@ xfs_rename(
2881 if (new_parent) 3014 if (new_parent)
2882 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 3015 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
2883 3016
3017finish_rename:
2884 /* 3018 /*
2885 * If this is a synchronous mount, make sure that the 3019 * If this is a synchronous mount, make sure that the
2886 * rename transaction goes to disk before returning to 3020 * rename transaction goes to disk before returning to
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 4ed2ba9342dc..86cd6b39bed7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -338,7 +338,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
338int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 338int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
339 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 339 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
340 struct xfs_name *target_name, 340 struct xfs_name *target_name,
341 struct xfs_inode *target_ip); 341 struct xfs_inode *target_ip, unsigned int flags);
342 342
343void xfs_ilock(xfs_inode_t *, uint); 343void xfs_ilock(xfs_inode_t *, uint);
344int xfs_ilock_nowait(xfs_inode_t *, uint); 344int xfs_ilock_nowait(xfs_inode_t *, uint);
@@ -377,6 +377,15 @@ int xfs_droplink(struct xfs_trans *, struct xfs_inode *);
377int xfs_bumplink(struct xfs_trans *, struct xfs_inode *); 377int xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
378 378
379/* from xfs_file.c */ 379/* from xfs_file.c */
380enum xfs_prealloc_flags {
381 XFS_PREALLOC_SET = (1 << 1),
382 XFS_PREALLOC_CLEAR = (1 << 2),
383 XFS_PREALLOC_SYNC = (1 << 3),
384 XFS_PREALLOC_INVISIBLE = (1 << 4),
385};
386
387int xfs_update_prealloc_flags(struct xfs_inode *,
388 enum xfs_prealloc_flags);
380int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); 389int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
381int xfs_iozero(struct xfs_inode *, loff_t, size_t); 390int xfs_iozero(struct xfs_inode *, loff_t, size_t);
382 391
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index a1831980a68e..f7afb86c9148 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -606,11 +606,8 @@ xfs_ioc_space(
606 unsigned int cmd, 606 unsigned int cmd,
607 xfs_flock64_t *bf) 607 xfs_flock64_t *bf)
608{ 608{
609 struct xfs_mount *mp = ip->i_mount;
610 struct xfs_trans *tp;
611 struct iattr iattr; 609 struct iattr iattr;
612 bool setprealloc = false; 610 enum xfs_prealloc_flags flags = 0;
613 bool clrprealloc = false;
614 int error; 611 int error;
615 612
616 /* 613 /*
@@ -630,6 +627,11 @@ xfs_ioc_space(
630 if (!S_ISREG(inode->i_mode)) 627 if (!S_ISREG(inode->i_mode))
631 return -EINVAL; 628 return -EINVAL;
632 629
630 if (filp->f_flags & O_DSYNC)
631 flags |= XFS_PREALLOC_SYNC;
632 if (ioflags & XFS_IO_INVIS)
633 flags |= XFS_PREALLOC_INVISIBLE;
634
633 error = mnt_want_write_file(filp); 635 error = mnt_want_write_file(filp);
634 if (error) 636 if (error)
635 return error; 637 return error;
@@ -673,25 +675,23 @@ xfs_ioc_space(
673 } 675 }
674 676
675 if (bf->l_start < 0 || 677 if (bf->l_start < 0 ||
676 bf->l_start > mp->m_super->s_maxbytes || 678 bf->l_start > inode->i_sb->s_maxbytes ||
677 bf->l_start + bf->l_len < 0 || 679 bf->l_start + bf->l_len < 0 ||
678 bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { 680 bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) {
679 error = -EINVAL; 681 error = -EINVAL;
680 goto out_unlock; 682 goto out_unlock;
681 } 683 }
682 684
683 switch (cmd) { 685 switch (cmd) {
684 case XFS_IOC_ZERO_RANGE: 686 case XFS_IOC_ZERO_RANGE:
687 flags |= XFS_PREALLOC_SET;
685 error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); 688 error = xfs_zero_file_space(ip, bf->l_start, bf->l_len);
686 if (!error)
687 setprealloc = true;
688 break; 689 break;
689 case XFS_IOC_RESVSP: 690 case XFS_IOC_RESVSP:
690 case XFS_IOC_RESVSP64: 691 case XFS_IOC_RESVSP64:
692 flags |= XFS_PREALLOC_SET;
691 error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, 693 error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len,
692 XFS_BMAPI_PREALLOC); 694 XFS_BMAPI_PREALLOC);
693 if (!error)
694 setprealloc = true;
695 break; 695 break;
696 case XFS_IOC_UNRESVSP: 696 case XFS_IOC_UNRESVSP:
697 case XFS_IOC_UNRESVSP64: 697 case XFS_IOC_UNRESVSP64:
@@ -701,6 +701,7 @@ xfs_ioc_space(
701 case XFS_IOC_ALLOCSP64: 701 case XFS_IOC_ALLOCSP64:
702 case XFS_IOC_FREESP: 702 case XFS_IOC_FREESP:
703 case XFS_IOC_FREESP64: 703 case XFS_IOC_FREESP64:
704 flags |= XFS_PREALLOC_CLEAR;
704 if (bf->l_start > XFS_ISIZE(ip)) { 705 if (bf->l_start > XFS_ISIZE(ip)) {
705 error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), 706 error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
706 bf->l_start - XFS_ISIZE(ip), 0); 707 bf->l_start - XFS_ISIZE(ip), 0);
@@ -712,8 +713,6 @@ xfs_ioc_space(
712 iattr.ia_size = bf->l_start; 713 iattr.ia_size = bf->l_start;
713 714
714 error = xfs_setattr_size(ip, &iattr); 715 error = xfs_setattr_size(ip, &iattr);
715 if (!error)
716 clrprealloc = true;
717 break; 716 break;
718 default: 717 default:
719 ASSERT(0); 718 ASSERT(0);
@@ -723,32 +722,7 @@ xfs_ioc_space(
723 if (error) 722 if (error)
724 goto out_unlock; 723 goto out_unlock;
725 724
726 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 725 error = xfs_update_prealloc_flags(ip, flags);
727 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0);
728 if (error) {
729 xfs_trans_cancel(tp, 0);
730 goto out_unlock;
731 }
732
733 xfs_ilock(ip, XFS_ILOCK_EXCL);
734 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
735
736 if (!(ioflags & XFS_IO_INVIS)) {
737 ip->i_d.di_mode &= ~S_ISUID;
738 if (ip->i_d.di_mode & S_IXGRP)
739 ip->i_d.di_mode &= ~S_ISGID;
740 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
741 }
742
743 if (setprealloc)
744 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
745 else if (clrprealloc)
746 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
747
748 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
749 if (filp->f_flags & O_DSYNC)
750 xfs_trans_set_sync(tp);
751 error = xfs_trans_commit(tp, 0);
752 726
753out_unlock: 727out_unlock:
754 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 728 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -1013,20 +987,182 @@ xfs_diflags_to_linux(
1013 inode->i_flags &= ~S_NOATIME; 987 inode->i_flags &= ~S_NOATIME;
1014} 988}
1015 989
1016#define FSX_PROJID 1 990static int
1017#define FSX_EXTSIZE 2 991xfs_ioctl_setattr_xflags(
1018#define FSX_XFLAGS 4 992 struct xfs_trans *tp,
1019#define FSX_NONBLOCK 8 993 struct xfs_inode *ip,
994 struct fsxattr *fa)
995{
996 struct xfs_mount *mp = ip->i_mount;
997
998 /* Can't change realtime flag if any extents are allocated. */
999 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
1000 XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
1001 return -EINVAL;
1002
1003 /* If realtime flag is set then must have realtime device */
1004 if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
1005 if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
1006 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
1007 return -EINVAL;
1008 }
1009
1010 /*
1011 * Can't modify an immutable/append-only file unless
1012 * we have appropriate permission.
1013 */
1014 if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
1015 (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
1016 !capable(CAP_LINUX_IMMUTABLE))
1017 return -EPERM;
1018
1019 xfs_set_diflags(ip, fa->fsx_xflags);
1020 xfs_diflags_to_linux(ip);
1021 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
1022 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1023 XFS_STATS_INC(xs_ig_attrchg);
1024 return 0;
1025}
1026
1027/*
1028 * Set up the transaction structure for the setattr operation, checking that we
1029 * have permission to do so. On success, return a clean transaction and the
1030 * inode locked exclusively ready for further operation specific checks. On
1031 * failure, return an error without modifying or locking the inode.
1032 */
1033static struct xfs_trans *
1034xfs_ioctl_setattr_get_trans(
1035 struct xfs_inode *ip)
1036{
1037 struct xfs_mount *mp = ip->i_mount;
1038 struct xfs_trans *tp;
1039 int error;
1040
1041 if (mp->m_flags & XFS_MOUNT_RDONLY)
1042 return ERR_PTR(-EROFS);
1043 if (XFS_FORCED_SHUTDOWN(mp))
1044 return ERR_PTR(-EIO);
1045
1046 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
1047 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
1048 if (error)
1049 goto out_cancel;
1050
1051 xfs_ilock(ip, XFS_ILOCK_EXCL);
1052 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1053
1054 /*
1055 * CAP_FOWNER overrides the following restrictions:
1056 *
1057 * The user ID of the calling process must be equal to the file owner
1058 * ID, except in cases where the CAP_FSETID capability is applicable.
1059 */
1060 if (!inode_owner_or_capable(VFS_I(ip))) {
1061 error = -EPERM;
1062 goto out_cancel;
1063 }
1064
1065 if (mp->m_flags & XFS_MOUNT_WSYNC)
1066 xfs_trans_set_sync(tp);
1067
1068 return tp;
1069
1070out_cancel:
1071 xfs_trans_cancel(tp, 0);
1072 return ERR_PTR(error);
1073}
1074
1075/*
1076 * extent size hint validation is somewhat cumbersome. Rules are:
1077 *
1078 * 1. extent size hint is only valid for directories and regular files
1079 * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
1080 * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
1081 * 4. can only be changed on regular files if no extents are allocated
1082 * 5. can be changed on directories at any time
1083 * 6. extsize hint of 0 turns off hints, clears inode flags.
1084 * 7. Extent size must be a multiple of the appropriate block size.
1085 * 8. for non-realtime files, the extent size hint must be limited
1086 * to half the AG size to avoid alignment extending the extent beyond the
1087 * limits of the AG.
1088 */
1089static int
1090xfs_ioctl_setattr_check_extsize(
1091 struct xfs_inode *ip,
1092 struct fsxattr *fa)
1093{
1094 struct xfs_mount *mp = ip->i_mount;
1095
1096 if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
1097 return -EINVAL;
1098
1099 if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
1100 !S_ISDIR(ip->i_d.di_mode))
1101 return -EINVAL;
1102
1103 if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents &&
1104 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
1105 return -EINVAL;
1106
1107 if (fa->fsx_extsize != 0) {
1108 xfs_extlen_t size;
1109 xfs_fsblock_t extsize_fsb;
1110
1111 extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
1112 if (extsize_fsb > MAXEXTLEN)
1113 return -EINVAL;
1114
1115 if (XFS_IS_REALTIME_INODE(ip) ||
1116 (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1117 size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
1118 } else {
1119 size = mp->m_sb.sb_blocksize;
1120 if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
1121 return -EINVAL;
1122 }
1123
1124 if (fa->fsx_extsize % size)
1125 return -EINVAL;
1126 } else
1127 fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
1128
1129 return 0;
1130}
1131
1132static int
1133xfs_ioctl_setattr_check_projid(
1134 struct xfs_inode *ip,
1135 struct fsxattr *fa)
1136{
1137 /* Disallow 32bit project ids if projid32bit feature is not enabled. */
1138 if (fa->fsx_projid > (__uint16_t)-1 &&
1139 !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
1140 return -EINVAL;
1141
1142 /*
1143 * Project Quota ID state is only allowed to change from within the init
1144 * namespace. Enforce that restriction only if we are trying to change
1145 * the quota ID state. Everything else is allowed in user namespaces.
1146 */
1147 if (current_user_ns() == &init_user_ns)
1148 return 0;
1149
1150 if (xfs_get_projid(ip) != fa->fsx_projid)
1151 return -EINVAL;
1152 if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
1153 (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
1154 return -EINVAL;
1155
1156 return 0;
1157}
1020 1158
1021STATIC int 1159STATIC int
1022xfs_ioctl_setattr( 1160xfs_ioctl_setattr(
1023 xfs_inode_t *ip, 1161 xfs_inode_t *ip,
1024 struct fsxattr *fa, 1162 struct fsxattr *fa)
1025 int mask)
1026{ 1163{
1027 struct xfs_mount *mp = ip->i_mount; 1164 struct xfs_mount *mp = ip->i_mount;
1028 struct xfs_trans *tp; 1165 struct xfs_trans *tp;
1029 unsigned int lock_flags = 0;
1030 struct xfs_dquot *udqp = NULL; 1166 struct xfs_dquot *udqp = NULL;
1031 struct xfs_dquot *pdqp = NULL; 1167 struct xfs_dquot *pdqp = NULL;
1032 struct xfs_dquot *olddquot = NULL; 1168 struct xfs_dquot *olddquot = NULL;
@@ -1034,17 +1170,9 @@ xfs_ioctl_setattr(
1034 1170
1035 trace_xfs_ioctl_setattr(ip); 1171 trace_xfs_ioctl_setattr(ip);
1036 1172
1037 if (mp->m_flags & XFS_MOUNT_RDONLY) 1173 code = xfs_ioctl_setattr_check_projid(ip, fa);
1038 return -EROFS; 1174 if (code)
1039 if (XFS_FORCED_SHUTDOWN(mp)) 1175 return code;
1040 return -EIO;
1041
1042 /*
1043 * Disallow 32bit project ids when projid32bit feature is not enabled.
1044 */
1045 if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
1046 !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
1047 return -EINVAL;
1048 1176
1049 /* 1177 /*
1050 * If disk quotas is on, we make sure that the dquots do exist on disk, 1178 * If disk quotas is on, we make sure that the dquots do exist on disk,
@@ -1054,7 +1182,7 @@ xfs_ioctl_setattr(
1054 * If the IDs do change before we take the ilock, we're covered 1182 * If the IDs do change before we take the ilock, we're covered
1055 * because the i_*dquot fields will get updated anyway. 1183 * because the i_*dquot fields will get updated anyway.
1056 */ 1184 */
1057 if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { 1185 if (XFS_IS_QUOTA_ON(mp)) {
1058 code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, 1186 code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
1059 ip->i_d.di_gid, fa->fsx_projid, 1187 ip->i_d.di_gid, fa->fsx_projid,
1060 XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); 1188 XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp);
@@ -1062,175 +1190,49 @@ xfs_ioctl_setattr(
1062 return code; 1190 return code;
1063 } 1191 }
1064 1192
1065 /* 1193 tp = xfs_ioctl_setattr_get_trans(ip);
1066 * For the other attributes, we acquire the inode lock and 1194 if (IS_ERR(tp)) {
1067 * first do an error checking pass. 1195 code = PTR_ERR(tp);
1068 */ 1196 goto error_free_dquots;
1069 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
1070 code = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
1071 if (code)
1072 goto error_return;
1073
1074 lock_flags = XFS_ILOCK_EXCL;
1075 xfs_ilock(ip, lock_flags);
1076
1077 /*
1078 * CAP_FOWNER overrides the following restrictions:
1079 *
1080 * The user ID of the calling process must be equal
1081 * to the file owner ID, except in cases where the
1082 * CAP_FSETID capability is applicable.
1083 */
1084 if (!inode_owner_or_capable(VFS_I(ip))) {
1085 code = -EPERM;
1086 goto error_return;
1087 }
1088
1089 /*
1090 * Do a quota reservation only if projid is actually going to change.
1091 * Only allow changing of projid from init_user_ns since it is a
1092 * non user namespace aware identifier.
1093 */
1094 if (mask & FSX_PROJID) {
1095 if (current_user_ns() != &init_user_ns) {
1096 code = -EINVAL;
1097 goto error_return;
1098 }
1099
1100 if (XFS_IS_QUOTA_RUNNING(mp) &&
1101 XFS_IS_PQUOTA_ON(mp) &&
1102 xfs_get_projid(ip) != fa->fsx_projid) {
1103 ASSERT(tp);
1104 code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL,
1105 pdqp, capable(CAP_FOWNER) ?
1106 XFS_QMOPT_FORCE_RES : 0);
1107 if (code) /* out of quota */
1108 goto error_return;
1109 }
1110 } 1197 }
1111 1198
1112 if (mask & FSX_EXTSIZE) {
1113 /*
1114 * Can't change extent size if any extents are allocated.
1115 */
1116 if (ip->i_d.di_nextents &&
1117 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
1118 fa->fsx_extsize)) {
1119 code = -EINVAL; /* EFBIG? */
1120 goto error_return;
1121 }
1122 1199
1123 /* 1200 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) &&
1124 * Extent size must be a multiple of the appropriate block 1201 xfs_get_projid(ip) != fa->fsx_projid) {
1125 * size, if set at all. It must also be smaller than the 1202 code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp,
1126 * maximum extent size supported by the filesystem. 1203 capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0);
1127 * 1204 if (code) /* out of quota */
1128 * Also, for non-realtime files, limit the extent size hint to 1205 goto error_trans_cancel;
1129 * half the size of the AGs in the filesystem so alignment
1130 * doesn't result in extents larger than an AG.
1131 */
1132 if (fa->fsx_extsize != 0) {
1133 xfs_extlen_t size;
1134 xfs_fsblock_t extsize_fsb;
1135
1136 extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
1137 if (extsize_fsb > MAXEXTLEN) {
1138 code = -EINVAL;
1139 goto error_return;
1140 }
1141
1142 if (XFS_IS_REALTIME_INODE(ip) ||
1143 ((mask & FSX_XFLAGS) &&
1144 (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
1145 size = mp->m_sb.sb_rextsize <<
1146 mp->m_sb.sb_blocklog;
1147 } else {
1148 size = mp->m_sb.sb_blocksize;
1149 if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
1150 code = -EINVAL;
1151 goto error_return;
1152 }
1153 }
1154
1155 if (fa->fsx_extsize % size) {
1156 code = -EINVAL;
1157 goto error_return;
1158 }
1159 }
1160 } 1206 }
1161 1207
1208 code = xfs_ioctl_setattr_check_extsize(ip, fa);
1209 if (code)
1210 goto error_trans_cancel;
1162 1211
1163 if (mask & FSX_XFLAGS) { 1212 code = xfs_ioctl_setattr_xflags(tp, ip, fa);
1164 /* 1213 if (code)
1165 * Can't change realtime flag if any extents are allocated. 1214 goto error_trans_cancel;
1166 */
1167 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
1168 (XFS_IS_REALTIME_INODE(ip)) !=
1169 (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1170 code = -EINVAL; /* EFBIG? */
1171 goto error_return;
1172 }
1173
1174 /*
1175 * If realtime flag is set then must have realtime data.
1176 */
1177 if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1178 if ((mp->m_sb.sb_rblocks == 0) ||
1179 (mp->m_sb.sb_rextsize == 0) ||
1180 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
1181 code = -EINVAL;
1182 goto error_return;
1183 }
1184 }
1185
1186 /*
1187 * Can't modify an immutable/append-only file unless
1188 * we have appropriate permission.
1189 */
1190 if ((ip->i_d.di_flags &
1191 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
1192 (fa->fsx_xflags &
1193 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
1194 !capable(CAP_LINUX_IMMUTABLE)) {
1195 code = -EPERM;
1196 goto error_return;
1197 }
1198 }
1199
1200 xfs_trans_ijoin(tp, ip, 0);
1201 1215
1202 /* 1216 /*
1203 * Change file ownership. Must be the owner or privileged. 1217 * Change file ownership. Must be the owner or privileged. CAP_FSETID
1218 * overrides the following restrictions:
1219 *
1220 * The set-user-ID and set-group-ID bits of a file will be cleared upon
1221 * successful return from chown()
1204 */ 1222 */
1205 if (mask & FSX_PROJID) {
1206 /*
1207 * CAP_FSETID overrides the following restrictions:
1208 *
1209 * The set-user-ID and set-group-ID bits of a file will be
1210 * cleared upon successful return from chown()
1211 */
1212 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
1213 !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
1214 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
1215
1216 /*
1217 * Change the ownerships and register quota modifications
1218 * in the transaction.
1219 */
1220 if (xfs_get_projid(ip) != fa->fsx_projid) {
1221 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
1222 olddquot = xfs_qm_vop_chown(tp, ip,
1223 &ip->i_pdquot, pdqp);
1224 }
1225 ASSERT(ip->i_d.di_version > 1);
1226 xfs_set_projid(ip, fa->fsx_projid);
1227 }
1228 1223
1229 } 1224 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
1225 !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
1226 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
1230 1227
1231 if (mask & FSX_XFLAGS) { 1228 /* Change the ownerships and register project quota modifications */
1232 xfs_set_diflags(ip, fa->fsx_xflags); 1229 if (xfs_get_projid(ip) != fa->fsx_projid) {
1233 xfs_diflags_to_linux(ip); 1230 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
1231 olddquot = xfs_qm_vop_chown(tp, ip,
1232 &ip->i_pdquot, pdqp);
1233 }
1234 ASSERT(ip->i_d.di_version > 1);
1235 xfs_set_projid(ip, fa->fsx_projid);
1234 } 1236 }
1235 1237
1236 /* 1238 /*
@@ -1238,34 +1240,12 @@ xfs_ioctl_setattr(
1238 * extent size hint should be set on the inode. If no extent size flags 1240 * extent size hint should be set on the inode. If no extent size flags
1239 * are set on the inode then unconditionally clear the extent size hint. 1241 * are set on the inode then unconditionally clear the extent size hint.
1240 */ 1242 */
1241 if (mask & FSX_EXTSIZE) { 1243 if (ip->i_d.di_flags & (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT))
1242 int extsize = 0; 1244 ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
1243 1245 else
1244 if (ip->i_d.di_flags & 1246 ip->i_d.di_extsize = 0;
1245 (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT))
1246 extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
1247 ip->i_d.di_extsize = extsize;
1248 }
1249
1250 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
1251 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1252
1253 XFS_STATS_INC(xs_ig_attrchg);
1254 1247
1255 /*
1256 * If this is a synchronous mount, make sure that the
1257 * transaction goes to disk before returning to the user.
1258 * This is slightly sub-optimal in that truncates require
1259 * two sync transactions instead of one for wsync filesystems.
1260 * One for the truncate and one for the timestamps since we
1261 * don't want to change the timestamps unless we're sure the
1262 * truncate worked. Truncates are less than 1% of the laddis
1263 * mix so this probably isn't worth the trouble to optimize.
1264 */
1265 if (mp->m_flags & XFS_MOUNT_WSYNC)
1266 xfs_trans_set_sync(tp);
1267 code = xfs_trans_commit(tp, 0); 1248 code = xfs_trans_commit(tp, 0);
1268 xfs_iunlock(ip, lock_flags);
1269 1249
1270 /* 1250 /*
1271 * Release any dquot(s) the inode had kept before chown. 1251 * Release any dquot(s) the inode had kept before chown.
@@ -1276,12 +1256,11 @@ xfs_ioctl_setattr(
1276 1256
1277 return code; 1257 return code;
1278 1258
1279 error_return: 1259error_trans_cancel:
1260 xfs_trans_cancel(tp, 0);
1261error_free_dquots:
1280 xfs_qm_dqrele(udqp); 1262 xfs_qm_dqrele(udqp);
1281 xfs_qm_dqrele(pdqp); 1263 xfs_qm_dqrele(pdqp);
1282 xfs_trans_cancel(tp, 0);
1283 if (lock_flags)
1284 xfs_iunlock(ip, lock_flags);
1285 return code; 1264 return code;
1286} 1265}
1287 1266
@@ -1292,20 +1271,15 @@ xfs_ioc_fssetxattr(
1292 void __user *arg) 1271 void __user *arg)
1293{ 1272{
1294 struct fsxattr fa; 1273 struct fsxattr fa;
1295 unsigned int mask;
1296 int error; 1274 int error;
1297 1275
1298 if (copy_from_user(&fa, arg, sizeof(fa))) 1276 if (copy_from_user(&fa, arg, sizeof(fa)))
1299 return -EFAULT; 1277 return -EFAULT;
1300 1278
1301 mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
1302 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1303 mask |= FSX_NONBLOCK;
1304
1305 error = mnt_want_write_file(filp); 1279 error = mnt_want_write_file(filp);
1306 if (error) 1280 if (error)
1307 return error; 1281 return error;
1308 error = xfs_ioctl_setattr(ip, &fa, mask); 1282 error = xfs_ioctl_setattr(ip, &fa);
1309 mnt_drop_write_file(filp); 1283 mnt_drop_write_file(filp);
1310 return error; 1284 return error;
1311} 1285}
@@ -1325,14 +1299,14 @@ xfs_ioc_getxflags(
1325 1299
1326STATIC int 1300STATIC int
1327xfs_ioc_setxflags( 1301xfs_ioc_setxflags(
1328 xfs_inode_t *ip, 1302 struct xfs_inode *ip,
1329 struct file *filp, 1303 struct file *filp,
1330 void __user *arg) 1304 void __user *arg)
1331{ 1305{
1306 struct xfs_trans *tp;
1332 struct fsxattr fa; 1307 struct fsxattr fa;
1333 unsigned int flags; 1308 unsigned int flags;
1334 unsigned int mask; 1309 int error;
1335 int error;
1336 1310
1337 if (copy_from_user(&flags, arg, sizeof(flags))) 1311 if (copy_from_user(&flags, arg, sizeof(flags)))
1338 return -EFAULT; 1312 return -EFAULT;
@@ -1342,15 +1316,26 @@ xfs_ioc_setxflags(
1342 FS_SYNC_FL)) 1316 FS_SYNC_FL))
1343 return -EOPNOTSUPP; 1317 return -EOPNOTSUPP;
1344 1318
1345 mask = FSX_XFLAGS;
1346 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1347 mask |= FSX_NONBLOCK;
1348 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); 1319 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
1349 1320
1350 error = mnt_want_write_file(filp); 1321 error = mnt_want_write_file(filp);
1351 if (error) 1322 if (error)
1352 return error; 1323 return error;
1353 error = xfs_ioctl_setattr(ip, &fa, mask); 1324
1325 tp = xfs_ioctl_setattr_get_trans(ip);
1326 if (IS_ERR(tp)) {
1327 error = PTR_ERR(tp);
1328 goto out_drop_write;
1329 }
1330
1331 error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
1332 if (error) {
1333 xfs_trans_cancel(tp, 0);
1334 goto out_drop_write;
1335 }
1336
1337 error = xfs_trans_commit(tp, 0);
1338out_drop_write:
1354 mnt_drop_write_file(filp); 1339 mnt_drop_write_file(filp);
1355 return error; 1340 return error;
1356} 1341}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index ec6772866f3d..bfc7c7c8a0c8 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -423,7 +423,7 @@ xfs_compat_attrmulti_by_handle(
423 423
424 ops = memdup_user(compat_ptr(am_hreq.ops), size); 424 ops = memdup_user(compat_ptr(am_hreq.ops), size);
425 if (IS_ERR(ops)) { 425 if (IS_ERR(ops)) {
426 error = -PTR_ERR(ops); 426 error = PTR_ERR(ops);
427 goto out_dput; 427 goto out_dput;
428 } 428 }
429 429
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index c980e2a5086b..ccb1dd0d509e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -802,7 +802,7 @@ int
802xfs_iomap_write_unwritten( 802xfs_iomap_write_unwritten(
803 xfs_inode_t *ip, 803 xfs_inode_t *ip,
804 xfs_off_t offset, 804 xfs_off_t offset,
805 size_t count) 805 xfs_off_t count)
806{ 806{
807 xfs_mount_t *mp = ip->i_mount; 807 xfs_mount_t *mp = ip->i_mount;
808 xfs_fileoff_t offset_fsb; 808 xfs_fileoff_t offset_fsb;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 411fbb8919ef..8688e663d744 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,6 +27,6 @@ int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
27 struct xfs_bmbt_irec *); 27 struct xfs_bmbt_irec *);
28int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, 28int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
29 struct xfs_bmbt_irec *); 29 struct xfs_bmbt_irec *);
30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); 30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
31 31
32#endif /* __XFS_IOMAP_H__*/ 32#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c50311cae1b1..ce80eeb8faa4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -380,18 +380,27 @@ xfs_vn_rename(
380 struct inode *odir, 380 struct inode *odir,
381 struct dentry *odentry, 381 struct dentry *odentry,
382 struct inode *ndir, 382 struct inode *ndir,
383 struct dentry *ndentry) 383 struct dentry *ndentry,
384 unsigned int flags)
384{ 385{
385 struct inode *new_inode = ndentry->d_inode; 386 struct inode *new_inode = ndentry->d_inode;
387 int omode = 0;
386 struct xfs_name oname; 388 struct xfs_name oname;
387 struct xfs_name nname; 389 struct xfs_name nname;
388 390
389 xfs_dentry_to_name(&oname, odentry, 0); 391 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
392 return -EINVAL;
393
394 /* if we are exchanging files, we need to set i_mode of both files */
395 if (flags & RENAME_EXCHANGE)
396 omode = ndentry->d_inode->i_mode;
397
398 xfs_dentry_to_name(&oname, odentry, omode);
390 xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); 399 xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
391 400
392 return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 401 return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
393 XFS_I(ndir), &nname, new_inode ? 402 XFS_I(ndir), &nname,
394 XFS_I(new_inode) : NULL); 403 new_inode ? XFS_I(new_inode) : NULL, flags);
395} 404}
396 405
397/* 406/*
@@ -1144,7 +1153,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
1144 */ 1153 */
1145 .rmdir = xfs_vn_unlink, 1154 .rmdir = xfs_vn_unlink,
1146 .mknod = xfs_vn_mknod, 1155 .mknod = xfs_vn_mknod,
1147 .rename = xfs_vn_rename, 1156 .rename2 = xfs_vn_rename,
1148 .get_acl = xfs_get_acl, 1157 .get_acl = xfs_get_acl,
1149 .set_acl = xfs_set_acl, 1158 .set_acl = xfs_set_acl,
1150 .getattr = xfs_vn_getattr, 1159 .getattr = xfs_vn_getattr,
@@ -1172,7 +1181,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
1172 */ 1181 */
1173 .rmdir = xfs_vn_unlink, 1182 .rmdir = xfs_vn_unlink,
1174 .mknod = xfs_vn_mknod, 1183 .mknod = xfs_vn_mknod,
1175 .rename = xfs_vn_rename, 1184 .rename2 = xfs_vn_rename,
1176 .get_acl = xfs_get_acl, 1185 .get_acl = xfs_get_acl,
1177 .set_acl = xfs_set_acl, 1186 .set_acl = xfs_set_acl,
1178 .getattr = xfs_vn_getattr, 1187 .getattr = xfs_vn_getattr,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e408bf5a3ff7..bcc7cfabb787 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -33,6 +33,7 @@
33#include "xfs_fsops.h" 33#include "xfs_fsops.h"
34#include "xfs_cksum.h" 34#include "xfs_cksum.h"
35#include "xfs_sysfs.h" 35#include "xfs_sysfs.h"
36#include "xfs_sb.h"
36 37
37kmem_zone_t *xfs_log_ticket_zone; 38kmem_zone_t *xfs_log_ticket_zone;
38 39
@@ -1290,9 +1291,20 @@ xfs_log_worker(
1290 struct xfs_mount *mp = log->l_mp; 1291 struct xfs_mount *mp = log->l_mp;
1291 1292
1292 /* dgc: errors ignored - not fatal and nowhere to report them */ 1293 /* dgc: errors ignored - not fatal and nowhere to report them */
1293 if (xfs_log_need_covered(mp)) 1294 if (xfs_log_need_covered(mp)) {
1294 xfs_fs_log_dummy(mp); 1295 /*
1295 else 1296 * Dump a transaction into the log that contains no real change.
1297 * This is needed to stamp the current tail LSN into the log
1298 * during the covering operation.
1299 *
1300 * We cannot use an inode here for this - that will push dirty
1301 * state back up into the VFS and then periodic inode flushing
1302 * will prevent log covering from making progress. Hence we
1303 * synchronously log the superblock instead to ensure the
1304 * superblock is immediately unpinned and can be written back.
1305 */
1306 xfs_sync_sb(mp, true);
1307 } else
1296 xfs_log_force(mp, 0); 1308 xfs_log_force(mp, 0);
1297 1309
1298 /* start pushing all the metadata that is currently dirty */ 1310 /* start pushing all the metadata that is currently dirty */
@@ -1395,6 +1407,8 @@ xlog_alloc_log(
1395 ASSERT(xfs_buf_islocked(bp)); 1407 ASSERT(xfs_buf_islocked(bp));
1396 xfs_buf_unlock(bp); 1408 xfs_buf_unlock(bp);
1397 1409
1410 /* use high priority wq for log I/O completion */
1411 bp->b_ioend_wq = mp->m_log_workqueue;
1398 bp->b_iodone = xlog_iodone; 1412 bp->b_iodone = xlog_iodone;
1399 log->l_xbuf = bp; 1413 log->l_xbuf = bp;
1400 1414
@@ -1427,6 +1441,8 @@ xlog_alloc_log(
1427 ASSERT(xfs_buf_islocked(bp)); 1441 ASSERT(xfs_buf_islocked(bp));
1428 xfs_buf_unlock(bp); 1442 xfs_buf_unlock(bp);
1429 1443
1444 /* use high priority wq for log I/O completion */
1445 bp->b_ioend_wq = mp->m_log_workqueue;
1430 bp->b_iodone = xlog_iodone; 1446 bp->b_iodone = xlog_iodone;
1431 iclog->ic_bp = bp; 1447 iclog->ic_bp = bp;
1432 iclog->ic_data = bp->b_addr; 1448 iclog->ic_data = bp->b_addr;
@@ -1806,8 +1822,6 @@ xlog_sync(
1806 XFS_BUF_ZEROFLAGS(bp); 1822 XFS_BUF_ZEROFLAGS(bp);
1807 XFS_BUF_ASYNC(bp); 1823 XFS_BUF_ASYNC(bp);
1808 bp->b_flags |= XBF_SYNCIO; 1824 bp->b_flags |= XBF_SYNCIO;
1809 /* use high priority completion wq */
1810 bp->b_ioend_wq = log->l_mp->m_log_workqueue;
1811 1825
1812 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { 1826 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
1813 bp->b_flags |= XBF_FUA; 1827 bp->b_flags |= XBF_FUA;
@@ -1856,8 +1870,6 @@ xlog_sync(
1856 bp->b_flags |= XBF_SYNCIO; 1870 bp->b_flags |= XBF_SYNCIO;
1857 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1871 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1858 bp->b_flags |= XBF_FUA; 1872 bp->b_flags |= XBF_FUA;
1859 /* use high priority completion wq */
1860 bp->b_ioend_wq = log->l_mp->m_log_workqueue;
1861 1873
1862 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1874 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1863 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1875 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@@ -2027,7 +2039,7 @@ xlog_print_tic_res(
2027 " total reg = %u bytes (o/flow = %u bytes)\n" 2039 " total reg = %u bytes (o/flow = %u bytes)\n"
2028 " ophdrs = %u (ophdr space = %u bytes)\n" 2040 " ophdrs = %u (ophdr space = %u bytes)\n"
2029 " ophdr + reg = %u bytes\n" 2041 " ophdr + reg = %u bytes\n"
2030 " num regions = %u\n", 2042 " num regions = %u",
2031 ((ticket->t_trans_type <= 0 || 2043 ((ticket->t_trans_type <= 0 ||
2032 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? 2044 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
2033 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), 2045 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d3d38836f87f..4fa80e63eea2 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -408,11 +408,11 @@ xfs_update_alignment(xfs_mount_t *mp)
408 if (xfs_sb_version_hasdalign(sbp)) { 408 if (xfs_sb_version_hasdalign(sbp)) {
409 if (sbp->sb_unit != mp->m_dalign) { 409 if (sbp->sb_unit != mp->m_dalign) {
410 sbp->sb_unit = mp->m_dalign; 410 sbp->sb_unit = mp->m_dalign;
411 mp->m_update_flags |= XFS_SB_UNIT; 411 mp->m_update_sb = true;
412 } 412 }
413 if (sbp->sb_width != mp->m_swidth) { 413 if (sbp->sb_width != mp->m_swidth) {
414 sbp->sb_width = mp->m_swidth; 414 sbp->sb_width = mp->m_swidth;
415 mp->m_update_flags |= XFS_SB_WIDTH; 415 mp->m_update_sb = true;
416 } 416 }
417 } else { 417 } else {
418 xfs_warn(mp, 418 xfs_warn(mp,
@@ -583,38 +583,19 @@ int
583xfs_mount_reset_sbqflags( 583xfs_mount_reset_sbqflags(
584 struct xfs_mount *mp) 584 struct xfs_mount *mp)
585{ 585{
586 int error;
587 struct xfs_trans *tp;
588
589 mp->m_qflags = 0; 586 mp->m_qflags = 0;
590 587
591 /* 588 /* It is OK to look at sb_qflags in the mount path without m_sb_lock. */
592 * It is OK to look at sb_qflags here in mount path,
593 * without m_sb_lock.
594 */
595 if (mp->m_sb.sb_qflags == 0) 589 if (mp->m_sb.sb_qflags == 0)
596 return 0; 590 return 0;
597 spin_lock(&mp->m_sb_lock); 591 spin_lock(&mp->m_sb_lock);
598 mp->m_sb.sb_qflags = 0; 592 mp->m_sb.sb_qflags = 0;
599 spin_unlock(&mp->m_sb_lock); 593 spin_unlock(&mp->m_sb_lock);
600 594
601 /* 595 if (!xfs_fs_writable(mp, SB_FREEZE_WRITE))
602 * If the fs is readonly, let the incore superblock run
603 * with quotas off but don't flush the update out to disk
604 */
605 if (mp->m_flags & XFS_MOUNT_RDONLY)
606 return 0; 596 return 0;
607 597
608 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 598 return xfs_sync_sb(mp, false);
609 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0);
610 if (error) {
611 xfs_trans_cancel(tp, 0);
612 xfs_alert(mp, "%s: Superblock update failed!", __func__);
613 return error;
614 }
615
616 xfs_mod_sb(tp, XFS_SB_QFLAGS);
617 return xfs_trans_commit(tp, 0);
618} 599}
619 600
620__uint64_t 601__uint64_t
@@ -659,26 +640,25 @@ xfs_mountfs(
659 xfs_sb_mount_common(mp, sbp); 640 xfs_sb_mount_common(mp, sbp);
660 641
661 /* 642 /*
662 * Check for a mismatched features2 values. Older kernels 643 * Check for a mismatched features2 values. Older kernels read & wrote
663 * read & wrote into the wrong sb offset for sb_features2 644 * into the wrong sb offset for sb_features2 on some platforms due to
664 * on some platforms due to xfs_sb_t not being 64bit size aligned 645 * xfs_sb_t not being 64bit size aligned when sb_features2 was added,
665 * when sb_features2 was added, which made older superblock 646 * which made older superblock reading/writing routines swap it as a
666 * reading/writing routines swap it as a 64-bit value. 647 * 64-bit value.
667 * 648 *
668 * For backwards compatibility, we make both slots equal. 649 * For backwards compatibility, we make both slots equal.
669 * 650 *
670 * If we detect a mismatched field, we OR the set bits into the 651 * If we detect a mismatched field, we OR the set bits into the existing
671 * existing features2 field in case it has already been modified; we 652 * features2 field in case it has already been modified; we don't want
672 * don't want to lose any features. We then update the bad location 653 * to lose any features. We then update the bad location with the ORed
673 * with the ORed value so that older kernels will see any features2 654 * value so that older kernels will see any features2 flags. The
674 * flags, and mark the two fields as needing updates once the 655 * superblock writeback code ensures the new sb_features2 is copied to
675 * transaction subsystem is online. 656 * sb_bad_features2 before it is logged or written to disk.
676 */ 657 */
677 if (xfs_sb_has_mismatched_features2(sbp)) { 658 if (xfs_sb_has_mismatched_features2(sbp)) {
678 xfs_warn(mp, "correcting sb_features alignment problem"); 659 xfs_warn(mp, "correcting sb_features alignment problem");
679 sbp->sb_features2 |= sbp->sb_bad_features2; 660 sbp->sb_features2 |= sbp->sb_bad_features2;
680 sbp->sb_bad_features2 = sbp->sb_features2; 661 mp->m_update_sb = true;
681 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2;
682 662
683 /* 663 /*
684 * Re-check for ATTR2 in case it was found in bad_features2 664 * Re-check for ATTR2 in case it was found in bad_features2
@@ -692,17 +672,17 @@ xfs_mountfs(
692 if (xfs_sb_version_hasattr2(&mp->m_sb) && 672 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
693 (mp->m_flags & XFS_MOUNT_NOATTR2)) { 673 (mp->m_flags & XFS_MOUNT_NOATTR2)) {
694 xfs_sb_version_removeattr2(&mp->m_sb); 674 xfs_sb_version_removeattr2(&mp->m_sb);
695 mp->m_update_flags |= XFS_SB_FEATURES2; 675 mp->m_update_sb = true;
696 676
697 /* update sb_versionnum for the clearing of the morebits */ 677 /* update sb_versionnum for the clearing of the morebits */
698 if (!sbp->sb_features2) 678 if (!sbp->sb_features2)
699 mp->m_update_flags |= XFS_SB_VERSIONNUM; 679 mp->m_update_sb = true;
700 } 680 }
701 681
702 /* always use v2 inodes by default now */ 682 /* always use v2 inodes by default now */
703 if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { 683 if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) {
704 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; 684 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
705 mp->m_update_flags |= XFS_SB_VERSIONNUM; 685 mp->m_update_sb = true;
706 } 686 }
707 687
708 /* 688 /*
@@ -895,8 +875,8 @@ xfs_mountfs(
895 * the next remount into writeable mode. Otherwise we would never 875 * the next remount into writeable mode. Otherwise we would never
896 * perform the update e.g. for the root filesystem. 876 * perform the update e.g. for the root filesystem.
897 */ 877 */
898 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { 878 if (mp->m_update_sb && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
899 error = xfs_mount_log_sb(mp, mp->m_update_flags); 879 error = xfs_sync_sb(mp, false);
900 if (error) { 880 if (error) {
901 xfs_warn(mp, "failed to write sb changes"); 881 xfs_warn(mp, "failed to write sb changes");
902 goto out_rtunmount; 882 goto out_rtunmount;
@@ -1103,9 +1083,6 @@ xfs_fs_writable(
1103int 1083int
1104xfs_log_sbcount(xfs_mount_t *mp) 1084xfs_log_sbcount(xfs_mount_t *mp)
1105{ 1085{
1106 xfs_trans_t *tp;
1107 int error;
1108
1109 /* allow this to proceed during the freeze sequence... */ 1086 /* allow this to proceed during the freeze sequence... */
1110 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) 1087 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1111 return 0; 1088 return 0;
@@ -1119,17 +1096,7 @@ xfs_log_sbcount(xfs_mount_t *mp)
1119 if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) 1096 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1120 return 0; 1097 return 0;
1121 1098
1122 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); 1099 return xfs_sync_sb(mp, true);
1123 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
1124 if (error) {
1125 xfs_trans_cancel(tp, 0);
1126 return error;
1127 }
1128
1129 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
1130 xfs_trans_set_sync(tp);
1131 error = xfs_trans_commit(tp, 0);
1132 return error;
1133} 1100}
1134 1101
1135/* 1102/*
@@ -1423,34 +1390,6 @@ xfs_freesb(
1423} 1390}
1424 1391
1425/* 1392/*
1426 * Used to log changes to the superblock unit and width fields which could
1427 * be altered by the mount options, as well as any potential sb_features2
1428 * fixup. Only the first superblock is updated.
1429 */
1430int
1431xfs_mount_log_sb(
1432 xfs_mount_t *mp,
1433 __int64_t fields)
1434{
1435 xfs_trans_t *tp;
1436 int error;
1437
1438 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
1439 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
1440 XFS_SB_VERSIONNUM));
1441
1442 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1443 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
1444 if (error) {
1445 xfs_trans_cancel(tp, 0);
1446 return error;
1447 }
1448 xfs_mod_sb(tp, fields);
1449 error = xfs_trans_commit(tp, 0);
1450 return error;
1451}
1452
1453/*
1454 * If the underlying (data/log/rt) device is readonly, there are some 1393 * If the underlying (data/log/rt) device is readonly, there are some
1455 * operations that cannot proceed. 1394 * operations that cannot proceed.
1456 */ 1395 */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 22ccf69d4d3c..a5b2ff822653 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -162,8 +162,7 @@ typedef struct xfs_mount {
162 struct delayed_work m_reclaim_work; /* background inode reclaim */ 162 struct delayed_work m_reclaim_work; /* background inode reclaim */
163 struct delayed_work m_eofblocks_work; /* background eof blocks 163 struct delayed_work m_eofblocks_work; /* background eof blocks
164 trimming */ 164 trimming */
165 __int64_t m_update_flags; /* sb flags we need to update 165 bool m_update_sb; /* sb needs update in mount */
166 on the next remount,rw */
167 int64_t m_low_space[XFS_LOWSP_MAX]; 166 int64_t m_low_space[XFS_LOWSP_MAX];
168 /* low free space thresholds */ 167 /* low free space thresholds */
169 struct xfs_kobj m_kobj; 168 struct xfs_kobj m_kobj;
@@ -378,7 +377,7 @@ extern void xfs_unmountfs(xfs_mount_t *);
378extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 377extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
379extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 378extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
380 uint, int); 379 uint, int);
381extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); 380extern int xfs_mount_log_sb(xfs_mount_t *);
382extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 381extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
383extern int xfs_readsb(xfs_mount_t *, int); 382extern int xfs_readsb(xfs_mount_t *, int);
384extern void xfs_freesb(xfs_mount_t *); 383extern void xfs_freesb(xfs_mount_t *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 79fb19dd9c83..3e8186279541 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -714,7 +714,6 @@ STATIC int
714xfs_qm_qino_alloc( 714xfs_qm_qino_alloc(
715 xfs_mount_t *mp, 715 xfs_mount_t *mp,
716 xfs_inode_t **ip, 716 xfs_inode_t **ip,
717 __int64_t sbfields,
718 uint flags) 717 uint flags)
719{ 718{
720 xfs_trans_t *tp; 719 xfs_trans_t *tp;
@@ -777,11 +776,6 @@ xfs_qm_qino_alloc(
777 spin_lock(&mp->m_sb_lock); 776 spin_lock(&mp->m_sb_lock);
778 if (flags & XFS_QMOPT_SBVERSION) { 777 if (flags & XFS_QMOPT_SBVERSION) {
779 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); 778 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
780 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
781 XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | XFS_SB_QFLAGS)) ==
782 (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
783 XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
784 XFS_SB_QFLAGS));
785 779
786 xfs_sb_version_addquota(&mp->m_sb); 780 xfs_sb_version_addquota(&mp->m_sb);
787 mp->m_sb.sb_uquotino = NULLFSINO; 781 mp->m_sb.sb_uquotino = NULLFSINO;
@@ -798,7 +792,7 @@ xfs_qm_qino_alloc(
798 else 792 else
799 mp->m_sb.sb_pquotino = (*ip)->i_ino; 793 mp->m_sb.sb_pquotino = (*ip)->i_ino;
800 spin_unlock(&mp->m_sb_lock); 794 spin_unlock(&mp->m_sb_lock);
801 xfs_mod_sb(tp, sbfields); 795 xfs_log_sb(tp);
802 796
803 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { 797 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
804 xfs_alert(mp, "%s failed (error %d)!", __func__, error); 798 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
@@ -1451,7 +1445,7 @@ xfs_qm_mount_quotas(
1451 spin_unlock(&mp->m_sb_lock); 1445 spin_unlock(&mp->m_sb_lock);
1452 1446
1453 if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { 1447 if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
1454 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { 1448 if (xfs_sync_sb(mp, false)) {
1455 /* 1449 /*
1456 * We could only have been turning quotas off. 1450 * We could only have been turning quotas off.
1457 * We aren't in very good shape actually because 1451 * We aren't in very good shape actually because
@@ -1482,7 +1476,6 @@ xfs_qm_init_quotainos(
1482 struct xfs_inode *gip = NULL; 1476 struct xfs_inode *gip = NULL;
1483 struct xfs_inode *pip = NULL; 1477 struct xfs_inode *pip = NULL;
1484 int error; 1478 int error;
1485 __int64_t sbflags = 0;
1486 uint flags = 0; 1479 uint flags = 0;
1487 1480
1488 ASSERT(mp->m_quotainfo); 1481 ASSERT(mp->m_quotainfo);
@@ -1517,9 +1510,6 @@ xfs_qm_init_quotainos(
1517 } 1510 }
1518 } else { 1511 } else {
1519 flags |= XFS_QMOPT_SBVERSION; 1512 flags |= XFS_QMOPT_SBVERSION;
1520 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1521 XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
1522 XFS_SB_QFLAGS);
1523 } 1513 }
1524 1514
1525 /* 1515 /*
@@ -1530,7 +1520,6 @@ xfs_qm_init_quotainos(
1530 */ 1520 */
1531 if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { 1521 if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1532 error = xfs_qm_qino_alloc(mp, &uip, 1522 error = xfs_qm_qino_alloc(mp, &uip,
1533 sbflags | XFS_SB_UQUOTINO,
1534 flags | XFS_QMOPT_UQUOTA); 1523 flags | XFS_QMOPT_UQUOTA);
1535 if (error) 1524 if (error)
1536 goto error_rele; 1525 goto error_rele;
@@ -1539,7 +1528,6 @@ xfs_qm_init_quotainos(
1539 } 1528 }
1540 if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { 1529 if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
1541 error = xfs_qm_qino_alloc(mp, &gip, 1530 error = xfs_qm_qino_alloc(mp, &gip,
1542 sbflags | XFS_SB_GQUOTINO,
1543 flags | XFS_QMOPT_GQUOTA); 1531 flags | XFS_QMOPT_GQUOTA);
1544 if (error) 1532 if (error)
1545 goto error_rele; 1533 goto error_rele;
@@ -1548,7 +1536,6 @@ xfs_qm_init_quotainos(
1548 } 1536 }
1549 if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { 1537 if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
1550 error = xfs_qm_qino_alloc(mp, &pip, 1538 error = xfs_qm_qino_alloc(mp, &pip,
1551 sbflags | XFS_SB_PQUOTINO,
1552 flags | XFS_QMOPT_PQUOTA); 1539 flags | XFS_QMOPT_PQUOTA);
1553 if (error) 1540 if (error)
1554 goto error_rele; 1541 goto error_rele;
@@ -1587,32 +1574,6 @@ xfs_qm_dqfree_one(
1587 xfs_qm_dqdestroy(dqp); 1574 xfs_qm_dqdestroy(dqp);
1588} 1575}
1589 1576
1590/*
1591 * Start a transaction and write the incore superblock changes to
1592 * disk. flags parameter indicates which fields have changed.
1593 */
1594int
1595xfs_qm_write_sb_changes(
1596 xfs_mount_t *mp,
1597 __int64_t flags)
1598{
1599 xfs_trans_t *tp;
1600 int error;
1601
1602 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1603 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0);
1604 if (error) {
1605 xfs_trans_cancel(tp, 0);
1606 return error;
1607 }
1608
1609 xfs_mod_sb(tp, flags);
1610 error = xfs_trans_commit(tp, 0);
1611
1612 return error;
1613}
1614
1615
1616/* --------------- utility functions for vnodeops ---------------- */ 1577/* --------------- utility functions for vnodeops ---------------- */
1617 1578
1618 1579
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 3a07a937e232..0d4d3590cf85 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -157,7 +157,6 @@ struct xfs_dquot_acct {
157#define XFS_QM_RTBWARNLIMIT 5 157#define XFS_QM_RTBWARNLIMIT 5
158 158
159extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); 159extern void xfs_qm_destroy_quotainfo(struct xfs_mount *);
160extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t);
161 160
162/* dquot stuff */ 161/* dquot stuff */
163extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); 162extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint);
@@ -166,9 +165,9 @@ extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
166/* quota ops */ 165/* quota ops */
167extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); 166extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
168extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, 167extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
169 uint, struct fs_disk_quota *); 168 uint, struct qc_dqblk *);
170extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, 169extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
171 struct fs_disk_quota *); 170 struct qc_dqblk *);
172extern int xfs_qm_scall_getqstat(struct xfs_mount *, 171extern int xfs_qm_scall_getqstat(struct xfs_mount *,
173 struct fs_quota_stat *); 172 struct fs_quota_stat *);
174extern int xfs_qm_scall_getqstatv(struct xfs_mount *, 173extern int xfs_qm_scall_getqstatv(struct xfs_mount *,
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 74fca68e43b6..9b965db45800 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -39,7 +39,6 @@ STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
39STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 39STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
40 uint); 40 uint);
41STATIC uint xfs_qm_export_flags(uint); 41STATIC uint xfs_qm_export_flags(uint);
42STATIC uint xfs_qm_export_qtype_flags(uint);
43 42
44/* 43/*
45 * Turn off quota accounting and/or enforcement for all udquots and/or 44 * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -92,8 +91,7 @@ xfs_qm_scall_quotaoff(
92 mutex_unlock(&q->qi_quotaofflock); 91 mutex_unlock(&q->qi_quotaofflock);
93 92
94 /* XXX what to do if error ? Revert back to old vals incore ? */ 93 /* XXX what to do if error ? Revert back to old vals incore ? */
95 error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); 94 return xfs_sync_sb(mp, false);
96 return error;
97 } 95 }
98 96
99 dqtype = 0; 97 dqtype = 0;
@@ -314,7 +312,6 @@ xfs_qm_scall_quotaon(
314{ 312{
315 int error; 313 int error;
316 uint qf; 314 uint qf;
317 __int64_t sbflags;
318 315
319 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 316 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
320 /* 317 /*
@@ -322,30 +319,22 @@ xfs_qm_scall_quotaon(
322 */ 319 */
323 flags &= ~(XFS_ALL_QUOTA_ACCT); 320 flags &= ~(XFS_ALL_QUOTA_ACCT);
324 321
325 sbflags = 0;
326
327 if (flags == 0) { 322 if (flags == 0) {
328 xfs_debug(mp, "%s: zero flags, m_qflags=%x", 323 xfs_debug(mp, "%s: zero flags, m_qflags=%x",
329 __func__, mp->m_qflags); 324 __func__, mp->m_qflags);
330 return -EINVAL; 325 return -EINVAL;
331 } 326 }
332 327
333 /* No fs can turn on quotas with a delayed effect */
334 ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
335
336 /* 328 /*
337 * Can't enforce without accounting. We check the superblock 329 * Can't enforce without accounting. We check the superblock
338 * qflags here instead of m_qflags because rootfs can have 330 * qflags here instead of m_qflags because rootfs can have
339 * quota acct on ondisk without m_qflags' knowing. 331 * quota acct on ondisk without m_qflags' knowing.
340 */ 332 */
341 if (((flags & XFS_UQUOTA_ACCT) == 0 && 333 if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
342 (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
343 (flags & XFS_UQUOTA_ENFD)) || 334 (flags & XFS_UQUOTA_ENFD)) ||
344 ((flags & XFS_GQUOTA_ACCT) == 0 && 335 ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
345 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
346 (flags & XFS_GQUOTA_ENFD)) || 336 (flags & XFS_GQUOTA_ENFD)) ||
347 ((flags & XFS_PQUOTA_ACCT) == 0 && 337 ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
348 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
349 (flags & XFS_PQUOTA_ENFD))) { 338 (flags & XFS_PQUOTA_ENFD))) {
350 xfs_debug(mp, 339 xfs_debug(mp,
351 "%s: Can't enforce without acct, flags=%x sbflags=%x", 340 "%s: Can't enforce without acct, flags=%x sbflags=%x",
@@ -370,11 +359,11 @@ xfs_qm_scall_quotaon(
370 /* 359 /*
371 * There's nothing to change if it's the same. 360 * There's nothing to change if it's the same.
372 */ 361 */
373 if ((qf & flags) == flags && sbflags == 0) 362 if ((qf & flags) == flags)
374 return -EEXIST; 363 return -EEXIST;
375 sbflags |= XFS_SB_QFLAGS;
376 364
377 if ((error = xfs_qm_write_sb_changes(mp, sbflags))) 365 error = xfs_sync_sb(mp, false);
366 if (error)
378 return error; 367 return error;
379 /* 368 /*
380 * If we aren't trying to switch on quota enforcement, we are done. 369 * If we aren't trying to switch on quota enforcement, we are done.
@@ -384,8 +373,7 @@ xfs_qm_scall_quotaon(
384 ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != 373 ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
385 (mp->m_qflags & XFS_PQUOTA_ACCT)) || 374 (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
386 ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != 375 ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
387 (mp->m_qflags & XFS_GQUOTA_ACCT)) || 376 (mp->m_qflags & XFS_GQUOTA_ACCT)))
388 (flags & XFS_ALL_QUOTA_ENFD) == 0)
389 return 0; 377 return 0;
390 378
391 if (! XFS_IS_QUOTA_RUNNING(mp)) 379 if (! XFS_IS_QUOTA_RUNNING(mp))
@@ -422,20 +410,12 @@ xfs_qm_scall_getqstat(
422 memset(out, 0, sizeof(fs_quota_stat_t)); 410 memset(out, 0, sizeof(fs_quota_stat_t));
423 411
424 out->qs_version = FS_QSTAT_VERSION; 412 out->qs_version = FS_QSTAT_VERSION;
425 if (!xfs_sb_version_hasquota(&mp->m_sb)) {
426 out->qs_uquota.qfs_ino = NULLFSINO;
427 out->qs_gquota.qfs_ino = NULLFSINO;
428 return 0;
429 }
430
431 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & 413 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
432 (XFS_ALL_QUOTA_ACCT| 414 (XFS_ALL_QUOTA_ACCT|
433 XFS_ALL_QUOTA_ENFD)); 415 XFS_ALL_QUOTA_ENFD));
434 if (q) { 416 uip = q->qi_uquotaip;
435 uip = q->qi_uquotaip; 417 gip = q->qi_gquotaip;
436 gip = q->qi_gquotaip; 418 pip = q->qi_pquotaip;
437 pip = q->qi_pquotaip;
438 }
439 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 419 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
440 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 420 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
441 0, 0, &uip) == 0) 421 0, 0, &uip) == 0)
@@ -481,14 +461,13 @@ xfs_qm_scall_getqstat(
481 if (temppqip) 461 if (temppqip)
482 IRELE(pip); 462 IRELE(pip);
483 } 463 }
484 if (q) { 464 out->qs_incoredqs = q->qi_dquots;
485 out->qs_incoredqs = q->qi_dquots; 465 out->qs_btimelimit = q->qi_btimelimit;
486 out->qs_btimelimit = q->qi_btimelimit; 466 out->qs_itimelimit = q->qi_itimelimit;
487 out->qs_itimelimit = q->qi_itimelimit; 467 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
488 out->qs_rtbtimelimit = q->qi_rtbtimelimit; 468 out->qs_bwarnlimit = q->qi_bwarnlimit;
489 out->qs_bwarnlimit = q->qi_bwarnlimit; 469 out->qs_iwarnlimit = q->qi_iwarnlimit;
490 out->qs_iwarnlimit = q->qi_iwarnlimit; 470
491 }
492 return 0; 471 return 0;
493} 472}
494 473
@@ -509,13 +488,6 @@ xfs_qm_scall_getqstatv(
509 bool tempgqip = false; 488 bool tempgqip = false;
510 bool temppqip = false; 489 bool temppqip = false;
511 490
512 if (!xfs_sb_version_hasquota(&mp->m_sb)) {
513 out->qs_uquota.qfs_ino = NULLFSINO;
514 out->qs_gquota.qfs_ino = NULLFSINO;
515 out->qs_pquota.qfs_ino = NULLFSINO;
516 return 0;
517 }
518
519 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & 491 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
520 (XFS_ALL_QUOTA_ACCT| 492 (XFS_ALL_QUOTA_ACCT|
521 XFS_ALL_QUOTA_ENFD)); 493 XFS_ALL_QUOTA_ENFD));
@@ -523,11 +495,9 @@ xfs_qm_scall_getqstatv(
523 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; 495 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
524 out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino; 496 out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino;
525 497
526 if (q) { 498 uip = q->qi_uquotaip;
527 uip = q->qi_uquotaip; 499 gip = q->qi_gquotaip;
528 gip = q->qi_gquotaip; 500 pip = q->qi_pquotaip;
529 pip = q->qi_pquotaip;
530 }
531 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 501 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
532 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 502 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
533 0, 0, &uip) == 0) 503 0, 0, &uip) == 0)
@@ -562,19 +532,18 @@ xfs_qm_scall_getqstatv(
562 if (temppqip) 532 if (temppqip)
563 IRELE(pip); 533 IRELE(pip);
564 } 534 }
565 if (q) { 535 out->qs_incoredqs = q->qi_dquots;
566 out->qs_incoredqs = q->qi_dquots; 536 out->qs_btimelimit = q->qi_btimelimit;
567 out->qs_btimelimit = q->qi_btimelimit; 537 out->qs_itimelimit = q->qi_itimelimit;
568 out->qs_itimelimit = q->qi_itimelimit; 538 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
569 out->qs_rtbtimelimit = q->qi_rtbtimelimit; 539 out->qs_bwarnlimit = q->qi_bwarnlimit;
570 out->qs_bwarnlimit = q->qi_bwarnlimit; 540 out->qs_iwarnlimit = q->qi_iwarnlimit;
571 out->qs_iwarnlimit = q->qi_iwarnlimit; 541
572 }
573 return 0; 542 return 0;
574} 543}
575 544
576#define XFS_DQ_MASK \ 545#define XFS_QC_MASK \
577 (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) 546 (QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK)
578 547
579/* 548/*
580 * Adjust quota limits, and start/stop timers accordingly. 549 * Adjust quota limits, and start/stop timers accordingly.
@@ -584,7 +553,7 @@ xfs_qm_scall_setqlim(
584 struct xfs_mount *mp, 553 struct xfs_mount *mp,
585 xfs_dqid_t id, 554 xfs_dqid_t id,
586 uint type, 555 uint type,
587 fs_disk_quota_t *newlim) 556 struct qc_dqblk *newlim)
588{ 557{
589 struct xfs_quotainfo *q = mp->m_quotainfo; 558 struct xfs_quotainfo *q = mp->m_quotainfo;
590 struct xfs_disk_dquot *ddq; 559 struct xfs_disk_dquot *ddq;
@@ -593,9 +562,9 @@ xfs_qm_scall_setqlim(
593 int error; 562 int error;
594 xfs_qcnt_t hard, soft; 563 xfs_qcnt_t hard, soft;
595 564
596 if (newlim->d_fieldmask & ~XFS_DQ_MASK) 565 if (newlim->d_fieldmask & ~XFS_QC_MASK)
597 return -EINVAL; 566 return -EINVAL;
598 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) 567 if ((newlim->d_fieldmask & XFS_QC_MASK) == 0)
599 return 0; 568 return 0;
600 569
601 /* 570 /*
@@ -633,11 +602,11 @@ xfs_qm_scall_setqlim(
633 /* 602 /*
634 * Make sure that hardlimits are >= soft limits before changing. 603 * Make sure that hardlimits are >= soft limits before changing.
635 */ 604 */
636 hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? 605 hard = (newlim->d_fieldmask & QC_SPC_HARD) ?
637 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : 606 (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_spc_hardlimit) :
638 be64_to_cpu(ddq->d_blk_hardlimit); 607 be64_to_cpu(ddq->d_blk_hardlimit);
639 soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? 608 soft = (newlim->d_fieldmask & QC_SPC_SOFT) ?
640 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : 609 (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_spc_softlimit) :
641 be64_to_cpu(ddq->d_blk_softlimit); 610 be64_to_cpu(ddq->d_blk_softlimit);
642 if (hard == 0 || hard >= soft) { 611 if (hard == 0 || hard >= soft) {
643 ddq->d_blk_hardlimit = cpu_to_be64(hard); 612 ddq->d_blk_hardlimit = cpu_to_be64(hard);
@@ -650,11 +619,11 @@ xfs_qm_scall_setqlim(
650 } else { 619 } else {
651 xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); 620 xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
652 } 621 }
653 hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? 622 hard = (newlim->d_fieldmask & QC_RT_SPC_HARD) ?
654 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : 623 (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_rt_spc_hardlimit) :
655 be64_to_cpu(ddq->d_rtb_hardlimit); 624 be64_to_cpu(ddq->d_rtb_hardlimit);
656 soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? 625 soft = (newlim->d_fieldmask & QC_RT_SPC_SOFT) ?
657 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : 626 (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_rt_spc_softlimit) :
658 be64_to_cpu(ddq->d_rtb_softlimit); 627 be64_to_cpu(ddq->d_rtb_softlimit);
659 if (hard == 0 || hard >= soft) { 628 if (hard == 0 || hard >= soft) {
660 ddq->d_rtb_hardlimit = cpu_to_be64(hard); 629 ddq->d_rtb_hardlimit = cpu_to_be64(hard);
@@ -667,10 +636,10 @@ xfs_qm_scall_setqlim(
667 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); 636 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
668 } 637 }
669 638
670 hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? 639 hard = (newlim->d_fieldmask & QC_INO_HARD) ?
671 (xfs_qcnt_t) newlim->d_ino_hardlimit : 640 (xfs_qcnt_t) newlim->d_ino_hardlimit :
672 be64_to_cpu(ddq->d_ino_hardlimit); 641 be64_to_cpu(ddq->d_ino_hardlimit);
673 soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? 642 soft = (newlim->d_fieldmask & QC_INO_SOFT) ?
674 (xfs_qcnt_t) newlim->d_ino_softlimit : 643 (xfs_qcnt_t) newlim->d_ino_softlimit :
675 be64_to_cpu(ddq->d_ino_softlimit); 644 be64_to_cpu(ddq->d_ino_softlimit);
676 if (hard == 0 || hard >= soft) { 645 if (hard == 0 || hard >= soft) {
@@ -687,12 +656,12 @@ xfs_qm_scall_setqlim(
687 /* 656 /*
688 * Update warnings counter(s) if requested 657 * Update warnings counter(s) if requested
689 */ 658 */
690 if (newlim->d_fieldmask & FS_DQ_BWARNS) 659 if (newlim->d_fieldmask & QC_SPC_WARNS)
691 ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); 660 ddq->d_bwarns = cpu_to_be16(newlim->d_spc_warns);
692 if (newlim->d_fieldmask & FS_DQ_IWARNS) 661 if (newlim->d_fieldmask & QC_INO_WARNS)
693 ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); 662 ddq->d_iwarns = cpu_to_be16(newlim->d_ino_warns);
694 if (newlim->d_fieldmask & FS_DQ_RTBWARNS) 663 if (newlim->d_fieldmask & QC_RT_SPC_WARNS)
695 ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); 664 ddq->d_rtbwarns = cpu_to_be16(newlim->d_rt_spc_warns);
696 665
697 if (id == 0) { 666 if (id == 0) {
698 /* 667 /*
@@ -702,24 +671,24 @@ xfs_qm_scall_setqlim(
702 * soft and hard limit values (already done, above), and 671 * soft and hard limit values (already done, above), and
703 * for warnings. 672 * for warnings.
704 */ 673 */
705 if (newlim->d_fieldmask & FS_DQ_BTIMER) { 674 if (newlim->d_fieldmask & QC_SPC_TIMER) {
706 q->qi_btimelimit = newlim->d_btimer; 675 q->qi_btimelimit = newlim->d_spc_timer;
707 ddq->d_btimer = cpu_to_be32(newlim->d_btimer); 676 ddq->d_btimer = cpu_to_be32(newlim->d_spc_timer);
708 } 677 }
709 if (newlim->d_fieldmask & FS_DQ_ITIMER) { 678 if (newlim->d_fieldmask & QC_INO_TIMER) {
710 q->qi_itimelimit = newlim->d_itimer; 679 q->qi_itimelimit = newlim->d_ino_timer;
711 ddq->d_itimer = cpu_to_be32(newlim->d_itimer); 680 ddq->d_itimer = cpu_to_be32(newlim->d_ino_timer);
712 } 681 }
713 if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { 682 if (newlim->d_fieldmask & QC_RT_SPC_TIMER) {
714 q->qi_rtbtimelimit = newlim->d_rtbtimer; 683 q->qi_rtbtimelimit = newlim->d_rt_spc_timer;
715 ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); 684 ddq->d_rtbtimer = cpu_to_be32(newlim->d_rt_spc_timer);
716 } 685 }
717 if (newlim->d_fieldmask & FS_DQ_BWARNS) 686 if (newlim->d_fieldmask & QC_SPC_WARNS)
718 q->qi_bwarnlimit = newlim->d_bwarns; 687 q->qi_bwarnlimit = newlim->d_spc_warns;
719 if (newlim->d_fieldmask & FS_DQ_IWARNS) 688 if (newlim->d_fieldmask & QC_INO_WARNS)
720 q->qi_iwarnlimit = newlim->d_iwarns; 689 q->qi_iwarnlimit = newlim->d_ino_warns;
721 if (newlim->d_fieldmask & FS_DQ_RTBWARNS) 690 if (newlim->d_fieldmask & QC_RT_SPC_WARNS)
722 q->qi_rtbwarnlimit = newlim->d_rtbwarns; 691 q->qi_rtbwarnlimit = newlim->d_rt_spc_warns;
723 } else { 692 } else {
724 /* 693 /*
725 * If the user is now over quota, start the timelimit. 694 * If the user is now over quota, start the timelimit.
@@ -801,7 +770,7 @@ xfs_qm_log_quotaoff(
801 mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; 770 mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
802 spin_unlock(&mp->m_sb_lock); 771 spin_unlock(&mp->m_sb_lock);
803 772
804 xfs_mod_sb(tp, XFS_SB_QFLAGS); 773 xfs_log_sb(tp);
805 774
806 /* 775 /*
807 * We have to make sure that the transaction is secure on disk before we 776 * We have to make sure that the transaction is secure on disk before we
@@ -824,7 +793,7 @@ xfs_qm_scall_getquota(
824 struct xfs_mount *mp, 793 struct xfs_mount *mp,
825 xfs_dqid_t id, 794 xfs_dqid_t id,
826 uint type, 795 uint type,
827 struct fs_disk_quota *dst) 796 struct qc_dqblk *dst)
828{ 797{
829 struct xfs_dquot *dqp; 798 struct xfs_dquot *dqp;
830 int error; 799 int error;
@@ -848,28 +817,25 @@ xfs_qm_scall_getquota(
848 } 817 }
849 818
850 memset(dst, 0, sizeof(*dst)); 819 memset(dst, 0, sizeof(*dst));
851 dst->d_version = FS_DQUOT_VERSION; 820 dst->d_spc_hardlimit =
852 dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags); 821 XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
853 dst->d_id = be32_to_cpu(dqp->q_core.d_id); 822 dst->d_spc_softlimit =
854 dst->d_blk_hardlimit = 823 XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit));
855 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
856 dst->d_blk_softlimit =
857 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit));
858 dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); 824 dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
859 dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); 825 dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
860 dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount); 826 dst->d_space = XFS_FSB_TO_B(mp, dqp->q_res_bcount);
861 dst->d_icount = dqp->q_res_icount; 827 dst->d_ino_count = dqp->q_res_icount;
862 dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer); 828 dst->d_spc_timer = be32_to_cpu(dqp->q_core.d_btimer);
863 dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer); 829 dst->d_ino_timer = be32_to_cpu(dqp->q_core.d_itimer);
864 dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns); 830 dst->d_ino_warns = be16_to_cpu(dqp->q_core.d_iwarns);
865 dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns); 831 dst->d_spc_warns = be16_to_cpu(dqp->q_core.d_bwarns);
866 dst->d_rtb_hardlimit = 832 dst->d_rt_spc_hardlimit =
867 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit)); 833 XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit));
868 dst->d_rtb_softlimit = 834 dst->d_rt_spc_softlimit =
869 XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit)); 835 XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit));
870 dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount); 836 dst->d_rt_space = XFS_FSB_TO_B(mp, dqp->q_res_rtbcount);
871 dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer); 837 dst->d_rt_spc_timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
872 dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns); 838 dst->d_rt_spc_warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
873 839
874 /* 840 /*
875 * Internally, we don't reset all the timers when quota enforcement 841 * Internally, we don't reset all the timers when quota enforcement
@@ -882,23 +848,23 @@ xfs_qm_scall_getquota(
882 dqp->q_core.d_flags == XFS_DQ_GROUP) || 848 dqp->q_core.d_flags == XFS_DQ_GROUP) ||
883 (!XFS_IS_PQUOTA_ENFORCED(mp) && 849 (!XFS_IS_PQUOTA_ENFORCED(mp) &&
884 dqp->q_core.d_flags == XFS_DQ_PROJ)) { 850 dqp->q_core.d_flags == XFS_DQ_PROJ)) {
885 dst->d_btimer = 0; 851 dst->d_spc_timer = 0;
886 dst->d_itimer = 0; 852 dst->d_ino_timer = 0;
887 dst->d_rtbtimer = 0; 853 dst->d_rt_spc_timer = 0;
888 } 854 }
889 855
890#ifdef DEBUG 856#ifdef DEBUG
891 if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || 857 if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
892 (XFS_IS_GQUOTA_ENFORCED(mp) && dst->d_flags == FS_GROUP_QUOTA) || 858 (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
893 (XFS_IS_PQUOTA_ENFORCED(mp) && dst->d_flags == FS_PROJ_QUOTA)) && 859 (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
894 dst->d_id != 0) { 860 id != 0) {
895 if ((dst->d_bcount > dst->d_blk_softlimit) && 861 if ((dst->d_space > dst->d_spc_softlimit) &&
896 (dst->d_blk_softlimit > 0)) { 862 (dst->d_spc_softlimit > 0)) {
897 ASSERT(dst->d_btimer != 0); 863 ASSERT(dst->d_spc_timer != 0);
898 } 864 }
899 if ((dst->d_icount > dst->d_ino_softlimit) && 865 if ((dst->d_ino_count > dst->d_ino_softlimit) &&
900 (dst->d_ino_softlimit > 0)) { 866 (dst->d_ino_softlimit > 0)) {
901 ASSERT(dst->d_itimer != 0); 867 ASSERT(dst->d_ino_timer != 0);
902 } 868 }
903 } 869 }
904#endif 870#endif
@@ -908,26 +874,6 @@ out_put:
908} 874}
909 875
910STATIC uint 876STATIC uint
911xfs_qm_export_qtype_flags(
912 uint flags)
913{
914 /*
915 * Can't be more than one, or none.
916 */
917 ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
918 (FS_PROJ_QUOTA | FS_USER_QUOTA));
919 ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
920 (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
921 ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
922 (FS_USER_QUOTA | FS_GROUP_QUOTA));
923 ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
924
925 return (flags & XFS_DQ_USER) ?
926 FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
927 FS_PROJ_QUOTA : FS_GROUP_QUOTA;
928}
929
930STATIC uint
931xfs_qm_export_flags( 877xfs_qm_export_flags(
932 uint flags) 878 uint flags)
933{ 879{
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 7542bbeca6a1..6923905ab33d 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -64,19 +64,10 @@ xfs_fs_get_xstatev(
64 return xfs_qm_scall_getqstatv(mp, fqs); 64 return xfs_qm_scall_getqstatv(mp, fqs);
65} 65}
66 66
67STATIC int 67static unsigned int
68xfs_fs_set_xstate( 68xfs_quota_flags(unsigned int uflags)
69 struct super_block *sb,
70 unsigned int uflags,
71 int op)
72{ 69{
73 struct xfs_mount *mp = XFS_M(sb); 70 unsigned int flags = 0;
74 unsigned int flags = 0;
75
76 if (sb->s_flags & MS_RDONLY)
77 return -EROFS;
78 if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
79 return -ENOSYS;
80 71
81 if (uflags & FS_QUOTA_UDQ_ACCT) 72 if (uflags & FS_QUOTA_UDQ_ACCT)
82 flags |= XFS_UQUOTA_ACCT; 73 flags |= XFS_UQUOTA_ACCT;
@@ -91,16 +82,39 @@ xfs_fs_set_xstate(
91 if (uflags & FS_QUOTA_PDQ_ENFD) 82 if (uflags & FS_QUOTA_PDQ_ENFD)
92 flags |= XFS_PQUOTA_ENFD; 83 flags |= XFS_PQUOTA_ENFD;
93 84
94 switch (op) { 85 return flags;
95 case Q_XQUOTAON: 86}
96 return xfs_qm_scall_quotaon(mp, flags); 87
97 case Q_XQUOTAOFF: 88STATIC int
98 if (!XFS_IS_QUOTA_ON(mp)) 89xfs_quota_enable(
99 return -EINVAL; 90 struct super_block *sb,
100 return xfs_qm_scall_quotaoff(mp, flags); 91 unsigned int uflags)
101 } 92{
93 struct xfs_mount *mp = XFS_M(sb);
94
95 if (sb->s_flags & MS_RDONLY)
96 return -EROFS;
97 if (!XFS_IS_QUOTA_RUNNING(mp))
98 return -ENOSYS;
99
100 return xfs_qm_scall_quotaon(mp, xfs_quota_flags(uflags));
101}
102
103STATIC int
104xfs_quota_disable(
105 struct super_block *sb,
106 unsigned int uflags)
107{
108 struct xfs_mount *mp = XFS_M(sb);
109
110 if (sb->s_flags & MS_RDONLY)
111 return -EROFS;
112 if (!XFS_IS_QUOTA_RUNNING(mp))
113 return -ENOSYS;
114 if (!XFS_IS_QUOTA_ON(mp))
115 return -EINVAL;
102 116
103 return -EINVAL; 117 return xfs_qm_scall_quotaoff(mp, xfs_quota_flags(uflags));
104} 118}
105 119
106STATIC int 120STATIC int
@@ -131,7 +145,7 @@ STATIC int
131xfs_fs_get_dqblk( 145xfs_fs_get_dqblk(
132 struct super_block *sb, 146 struct super_block *sb,
133 struct kqid qid, 147 struct kqid qid,
134 struct fs_disk_quota *fdq) 148 struct qc_dqblk *qdq)
135{ 149{
136 struct xfs_mount *mp = XFS_M(sb); 150 struct xfs_mount *mp = XFS_M(sb);
137 151
@@ -141,14 +155,14 @@ xfs_fs_get_dqblk(
141 return -ESRCH; 155 return -ESRCH;
142 156
143 return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), 157 return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
144 xfs_quota_type(qid.type), fdq); 158 xfs_quota_type(qid.type), qdq);
145} 159}
146 160
147STATIC int 161STATIC int
148xfs_fs_set_dqblk( 162xfs_fs_set_dqblk(
149 struct super_block *sb, 163 struct super_block *sb,
150 struct kqid qid, 164 struct kqid qid,
151 struct fs_disk_quota *fdq) 165 struct qc_dqblk *qdq)
152{ 166{
153 struct xfs_mount *mp = XFS_M(sb); 167 struct xfs_mount *mp = XFS_M(sb);
154 168
@@ -160,13 +174,14 @@ xfs_fs_set_dqblk(
160 return -ESRCH; 174 return -ESRCH;
161 175
162 return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), 176 return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
163 xfs_quota_type(qid.type), fdq); 177 xfs_quota_type(qid.type), qdq);
164} 178}
165 179
166const struct quotactl_ops xfs_quotactl_operations = { 180const struct quotactl_ops xfs_quotactl_operations = {
167 .get_xstatev = xfs_fs_get_xstatev, 181 .get_xstatev = xfs_fs_get_xstatev,
168 .get_xstate = xfs_fs_get_xstate, 182 .get_xstate = xfs_fs_get_xstate,
169 .set_xstate = xfs_fs_set_xstate, 183 .quota_enable = xfs_quota_enable,
184 .quota_disable = xfs_quota_disable,
170 .rm_xquota = xfs_fs_rm_xquota, 185 .rm_xquota = xfs_fs_rm_xquota,
171 .get_dqblk = xfs_fs_get_dqblk, 186 .get_dqblk = xfs_fs_get_dqblk,
172 .set_dqblk = xfs_fs_set_dqblk, 187 .set_dqblk = xfs_fs_set_dqblk,
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 19cbda196369..f2449fd86926 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -685,7 +685,7 @@ xfs_blkdev_get(
685 mp); 685 mp);
686 if (IS_ERR(*bdevp)) { 686 if (IS_ERR(*bdevp)) {
687 error = PTR_ERR(*bdevp); 687 error = PTR_ERR(*bdevp);
688 xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); 688 xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
689 } 689 }
690 690
691 return error; 691 return error;
@@ -1111,6 +1111,11 @@ xfs_fs_statfs(
1111 statp->f_files, 1111 statp->f_files,
1112 mp->m_maxicount); 1112 mp->m_maxicount);
1113 1113
1114 /* If sb_icount overshot maxicount, report actual allocation */
1115 statp->f_files = max_t(typeof(statp->f_files),
1116 statp->f_files,
1117 sbp->sb_icount);
1118
1114 /* make sure statp->f_ffree does not underflow */ 1119 /* make sure statp->f_ffree does not underflow */
1115 ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 1120 ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
1116 statp->f_ffree = max_t(__int64_t, ffree, 0); 1121 statp->f_ffree = max_t(__int64_t, ffree, 0);
@@ -1257,13 +1262,13 @@ xfs_fs_remount(
1257 * If this is the first remount to writeable state we 1262 * If this is the first remount to writeable state we
1258 * might have some superblock changes to update. 1263 * might have some superblock changes to update.
1259 */ 1264 */
1260 if (mp->m_update_flags) { 1265 if (mp->m_update_sb) {
1261 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1266 error = xfs_sync_sb(mp, false);
1262 if (error) { 1267 if (error) {
1263 xfs_warn(mp, "failed to write sb changes"); 1268 xfs_warn(mp, "failed to write sb changes");
1264 return error; 1269 return error;
1265 } 1270 }
1266 mp->m_update_flags = 0; 1271 mp->m_update_sb = false;
1267 } 1272 }
1268 1273
1269 /* 1274 /*
@@ -1293,8 +1298,9 @@ xfs_fs_remount(
1293 1298
1294/* 1299/*
1295 * Second stage of a freeze. The data is already frozen so we only 1300 * Second stage of a freeze. The data is already frozen so we only
1296 * need to take care of the metadata. Once that's done write a dummy 1301 * need to take care of the metadata. Once that's done sync the superblock
1297 * record to dirty the log in case of a crash while frozen. 1302 * to the log to dirty it in case of a crash while frozen. This ensures that we
1303 * will recover the unlinked inode lists on the next mount.
1298 */ 1304 */
1299STATIC int 1305STATIC int
1300xfs_fs_freeze( 1306xfs_fs_freeze(
@@ -1304,7 +1310,7 @@ xfs_fs_freeze(
1304 1310
1305 xfs_save_resvblks(mp); 1311 xfs_save_resvblks(mp);
1306 xfs_quiesce_attr(mp); 1312 xfs_quiesce_attr(mp);
1307 return xfs_fs_log_dummy(mp); 1313 return xfs_sync_sb(mp, true);
1308} 1314}
1309 1315
1310STATIC int 1316STATIC int
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 1743b9f8e23d..a0c8067cea6f 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -149,24 +149,6 @@ static struct ctl_table xfs_table[] = {
149 .extra2 = &xfs_params.inherit_noatim.max 149 .extra2 = &xfs_params.inherit_noatim.max
150 }, 150 },
151 { 151 {
152 .procname = "xfsbufd_centisecs",
153 .data = &xfs_params.xfs_buf_timer.val,
154 .maxlen = sizeof(int),
155 .mode = 0644,
156 .proc_handler = proc_dointvec_minmax,
157 .extra1 = &xfs_params.xfs_buf_timer.min,
158 .extra2 = &xfs_params.xfs_buf_timer.max
159 },
160 {
161 .procname = "age_buffer_centisecs",
162 .data = &xfs_params.xfs_buf_age.val,
163 .maxlen = sizeof(int),
164 .mode = 0644,
165 .proc_handler = proc_dointvec_minmax,
166 .extra1 = &xfs_params.xfs_buf_age.min,
167 .extra2 = &xfs_params.xfs_buf_age.max
168 },
169 {
170 .procname = "inherit_nosymlinks", 152 .procname = "inherit_nosymlinks",
171 .data = &xfs_params.inherit_nosym.val, 153 .data = &xfs_params.inherit_nosym.val,
172 .maxlen = sizeof(int), 154 .maxlen = sizeof(int),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index fa3135b9bf04..eb90cd59a0ec 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -472,6 +472,7 @@ xfs_trans_apply_sb_deltas(
472 whole = 1; 472 whole = 1;
473 } 473 }
474 474
475 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
475 if (whole) 476 if (whole)
476 /* 477 /*
477 * Log the whole thing, the fields are noncontiguous. 478 * Log the whole thing, the fields are noncontiguous.
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 0a4d4ab6d9a9..75798412859a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -327,9 +327,10 @@ xfs_trans_read_buf_map(
327 return -EIO; 327 return -EIO;
328 } 328 }
329 329
330 if (tp) 330 if (tp) {
331 _xfs_trans_bjoin(tp, bp, 1); 331 _xfs_trans_bjoin(tp, bp, 1);
332 trace_xfs_trans_read_buf(bp->b_fspriv); 332 trace_xfs_trans_read_buf(bp->b_fspriv);
333 }
333 *bpp = bp; 334 *bpp = bp;
334 return 0; 335 return 0;
335 336